1use std::collections::{HashMap, HashSet};
2use std::fmt;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use futures::stream::{FuturesUnordered, StreamExt};
7use neo4rs::{query, Graph};
8use thiserror::Error;
9use tokio::sync::Mutex;
10use tree_sitter::{Node, Tree};
11
12use crate::compress::{compress_full_source, compress_snippet, CompressorClient};
13use crate::go_resolve::{
14 discover_go_modules, discover_go_replaces, is_likely_third_party_go_import,
15 resolve_go_import_to_known_go_file, GoModule, GoReplace,
16};
17use crate::python_common_external::is_python_common_external_top_level;
18use crate::go_stdlib::is_go_stdlib_import;
19use crate::ir::{
20 api_endpoint_key, external_api_key, module_key, ClassIr, EdgeIr,
21 EdgeKind, FunctionIr, ProjectIr, PropertyIr,
22};
23use crate::python_stdlib::is_python_stdlib_top_level;
24use crate::schema::props;
25use crate::scanner::ParsedFile;
26use crate::LanguageId;
27
28#[derive(Debug, Clone)]
30pub struct Neo4jConfig {
31 pub uri: String,
33 pub user: String,
35 pub password: String,
37}
38
39#[derive(Debug, Clone)]
41pub struct GraphPersistenceOptions {
42 pub verbose_imports: bool,
44 pub max_parse_warnings_per_file: usize,
46 pub compressor: CompressorConfig,
48}
49
50impl Default for GraphPersistenceOptions {
51 fn default() -> Self {
52 Self {
53 verbose_imports: false,
54 max_parse_warnings_per_file: 50,
55 compressor: CompressorConfig::default(),
56 }
57 }
58}
59
60pub use crate::compress::{CompressorConfig, DEFAULT_COMPRESSOR_URL};
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum NodeKind {
68 File,
69 Class,
70 Function,
71}
72
73impl fmt::Display for NodeKind {
74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75 match self {
76 NodeKind::File => f.write_str("File"),
77 NodeKind::Class => f.write_str("Class"),
78 NodeKind::Function => f.write_str("Function"),
79 }
80 }
81}
82
83#[derive(Debug, Clone)]
85struct ClassSymbol {
86 name: String,
88 fqn: String,
90 kind: Option<&'static str>,
93}
94
95#[derive(Debug, Clone)]
97struct PropertySymbol {
98 class_fqn: String,
99 name: String,
100 fqn: String,
102 declared_type: Option<String>,
103}
104
105#[derive(Debug, Clone)]
107struct FunctionSymbol {
108 name: String,
110 fqn: String,
114 class_fqn: Option<String>,
116 return_type: Option<String>,
118 param_types: Vec<String>,
120 param_count: usize,
122 modifiers: Vec<String>,
124 is_pointer_receiver: Option<bool>,
126}
127
128#[derive(Debug, Clone, PartialEq, Eq, Hash)]
130struct CallbackContract {
131 behaviour: String,
132 name: String,
133 arity: u32,
134 optional: bool,
135}
136
137#[derive(Debug, Default, Clone)]
138struct ErlangModuleSnapshot {
139 implemented_signatures: HashSet<(String, u32)>,
140 callback_signatures: HashSet<(String, u32)>,
141}
142
143#[derive(Debug, Default)]
145struct ErlangBehaviourMetadata {
146 behaviour_usages: HashSet<String>,
147 declared_callbacks: Vec<(String, u32)>,
148 optional_callbacks: HashSet<(String, u32)>,
149 behaviour_extensions: HashSet<String>,
150 overridden_callbacks: HashSet<(String, u32)>,
151}
152
153#[derive(Debug, Error)]
155pub enum GraphError {
156 #[error("neo4j connection error: {0}")]
157 Connection(#[from] neo4rs::Error),
158}
159
160const BATCH_FLUSH_THRESHOLD: usize = 3000;
163
164const CSHARP_NODE_BATCH_FLUSH_THRESHOLD: usize = 500;
166
167#[derive(Debug, Default)]
171struct BatchAccumulator {
172 calls_function: HashSet<(String, String)>,
174 uses_class: HashSet<(String, String)>,
176 class_uses_class: HashSet<(String, String)>,
178 calls_external_api: HashSet<(String, String, String)>,
180}
181
182type SharedBatchAccumulator = Arc<Mutex<BatchAccumulator>>;
183
184impl BatchAccumulator {
185 fn new() -> Self {
186 Self::default()
187 }
188
189 fn add_calls_function(&mut self, caller_fqn: String, callee_fqn: String) {
191 self.calls_function.insert((caller_fqn, callee_fqn));
192 }
193
194 fn add_uses_class(&mut self, fn_fqn: String, class_fqn: String) {
196 self.uses_class.insert((fn_fqn, class_fqn));
197 }
198
199 fn add_class_uses_class(&mut self, derived_fqn: String, base_fqn: String) {
201 self.class_uses_class.insert((derived_fqn, base_fqn));
202 }
203
204 fn add_calls_external_api(&mut self, fn_fqn: String, base_url: String, norm_path: String) {
206 self.calls_external_api.insert((fn_fqn, base_url, norm_path));
207 }
208
209 fn should_flush(&self) -> bool {
211 self.calls_function.len() >= BATCH_FLUSH_THRESHOLD
212 || self.uses_class.len() >= BATCH_FLUSH_THRESHOLD
213 || self.class_uses_class.len() >= BATCH_FLUSH_THRESHOLD
214 || self.calls_external_api.len() >= BATCH_FLUSH_THRESHOLD
215 }
216
217 fn total_size(&self) -> usize {
219 self.calls_function.len()
220 + self.uses_class.len()
221 + self.class_uses_class.len()
222 + self.calls_external_api.len()
223 }
224
225 async fn flush(&mut self, graph: &Graph) -> Result<(), GraphError> {
227 let total = self.total_size();
228 if total == 0 {
229 return Ok(());
230 }
231
232 println!("Neo4j: flushing {} accumulated relationships...", total);
233
234 if !self.calls_function.is_empty() {
236 let caller_fqns: Vec<String> = self.calls_function.iter().map(|(c, _)| c.clone()).collect();
237 let callee_fqns: Vec<String> = self.calls_function.iter().map(|(_, c)| c.clone()).collect();
238
239 let batch_query = query(
240 "
241 UNWIND range(0, size($caller_fqns) - 1) AS i
242 WITH $caller_fqns[i] AS caller_fqn, $callee_fqns[i] AS callee_fqn
243 MERGE (caller:Function { fqn: caller_fqn })
244 MERGE (callee:Function { fqn: callee_fqn })
245 MERGE (caller)-[:CALLS_FUNCTION]->(callee)
246 ",
247 )
248 .param("caller_fqns", caller_fqns)
249 .param("callee_fqns", callee_fqns);
250
251 graph.run(batch_query).await?;
252 self.calls_function.clear();
253 }
254
255 if !self.uses_class.is_empty() {
257 let fn_fqns: Vec<String> = self.uses_class.iter().map(|(f, _)| f.clone()).collect();
258 let cls_fqns: Vec<String> = self.uses_class.iter().map(|(_, c)| c.clone()).collect();
259
260 let batch_query = query(
261 "
262 UNWIND range(0, size($fn_fqns) - 1) AS i
263 WITH $fn_fqns[i] AS fn_fqn, $cls_fqns[i] AS cls_fqn
264 MERGE (fn:Function { fqn: fn_fqn })
265 MERGE (cls:Class { fqn: cls_fqn })
266 MERGE (fn)-[:USES_CLASS]->(cls)
267 ",
268 )
269 .param("fn_fqns", fn_fqns)
270 .param("cls_fqns", cls_fqns);
271
272 graph.run(batch_query).await?;
273 self.uses_class.clear();
274 }
275
276 if !self.class_uses_class.is_empty() {
278 let derived: Vec<String> = self.class_uses_class.iter().map(|(d, _)| d.clone()).collect();
279 let bases: Vec<String> = self.class_uses_class.iter().map(|(_, b)| b.clone()).collect();
280
281 let batch_query = query(
282 "
283 UNWIND range(0, size($derived_fqns) - 1) AS i
284 WITH $derived_fqns[i] AS derived_fqn, $base_fqns[i] AS base_fqn
285 MERGE (d:Class { fqn: derived_fqn })
286 MERGE (b:Class { fqn: base_fqn })
287 MERGE (d)-[:USES_CLASS]->(b)
288 ",
289 )
290 .param("derived_fqns", derived)
291 .param("base_fqns", bases);
292
293 graph.run(batch_query).await?;
294 self.class_uses_class.clear();
295 }
296
297 if !self.calls_external_api.is_empty() {
299 let fn_fqns: Vec<String> = self.calls_external_api.iter().map(|(f, _, _)| f.clone()).collect();
300 let base_urls: Vec<String> = self.calls_external_api.iter().map(|(_, b, _)| b.clone()).collect();
301 let norm_paths: Vec<String> = self.calls_external_api.iter().map(|(_, _, n)| n.clone()).collect();
302
303 let batch_query = query(
304 "
305 UNWIND range(0, size($fn_fqns) - 1) AS i
306 WITH $fn_fqns[i] AS fn_fqn, $base_urls[i] AS base_url, $norm_paths[i] AS norm_path
307 MERGE (fn:Function { fqn: fn_fqn })
308 MERGE (ext:ExternalApi { base_url: base_url, norm_path: norm_path })
309 MERGE (fn)-[:CALLS_EXTERNAL_API]->(ext)
310 ",
311 )
312 .param("fn_fqns", fn_fqns)
313 .param("base_urls", base_urls)
314 .param("norm_paths", norm_paths);
315
316 graph.run(batch_query).await?;
317 self.calls_external_api.clear();
318 }
319
320 Ok(())
321 }
322}
323
324async fn flush_shared_accumulator_if_needed(
325 shared_accumulator: &SharedBatchAccumulator,
326 graph: &Graph,
327) -> Result<(), GraphError> {
328 let mut local_batch = BatchAccumulator::new();
329 {
330 let mut guard = shared_accumulator.lock().await;
331 if !guard.should_flush() {
332 return Ok(());
333 }
334 std::mem::swap(&mut *guard, &mut local_batch);
335 }
336 local_batch.flush(graph).await
337}
338
339async fn flush_shared_accumulator_force(
340 shared_accumulator: &SharedBatchAccumulator,
341 graph: &Graph,
342) -> Result<(), GraphError> {
343 let mut local_batch = BatchAccumulator::new();
344 {
345 let mut guard = shared_accumulator.lock().await;
346 if guard.total_size() == 0 {
347 return Ok(());
348 }
349 std::mem::swap(&mut *guard, &mut local_batch);
350 }
351 local_batch.flush(graph).await
352}
353
354const MAX_CONCURRENT_ERLANG_WRITES: usize = 8;
358
359fn repo_relative_file_path(root: &Path, file_path: &Path) -> PathBuf {
362 let combined = if file_path.is_absolute() {
363 file_path.to_path_buf()
364 } else {
365 root.join(file_path)
366 };
367 let root_abs = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
368 let file_abs = combined
369 .canonicalize()
370 .unwrap_or_else(|_| combined.clone());
371 if let Ok(rel) = file_abs.strip_prefix(&root_abs) {
372 if !rel.as_os_str().is_empty() {
373 return rel.to_path_buf();
374 }
375 }
376 if let Ok(rel) = combined.strip_prefix(root) {
377 return rel.to_path_buf();
378 }
379 if let Ok(rel) = file_path.strip_prefix(root) {
380 return rel.to_path_buf();
381 }
382 if let Ok(rel) = file_path.strip_prefix(&root_abs) {
383 return rel.to_path_buf();
384 }
385 file_path.to_path_buf()
386}
387
388fn neo4j_path_string(root: &Path, file_path: &Path) -> String {
390 path_str_slash(&repo_relative_file_path(root, file_path))
391}
392
393fn path_str_slash(p: &Path) -> String {
394 p.to_string_lossy().replace('\\', "/")
395}
396
397pub(crate) fn derive_project_name(file_path: &Path, root: &Path) -> Option<String> {
399 use std::path::Component;
400 let rel = repo_relative_file_path(root, file_path);
401 for c in rel.components() {
402 if let Component::Normal(s) = c {
403 return s.to_str().map(|x| x.to_string());
404 }
405 }
406 None
407}
408
409fn build_erlang_module_index(files: &[ParsedFile]) -> HashMap<String, ErlangModuleSnapshot> {
410 let mut index: HashMap<String, ErlangModuleSnapshot> = HashMap::new();
411
412 for file in files {
413 if file.language != LanguageId::Erlang {
414 continue;
415 }
416
417 let Some(module_name) = resolve_erlang_module_name(&file.path, &file.tree, &file.source) else {
418 continue;
419 };
420
421 let functions = extract_erlang_functions(&module_name, &file.tree, &file.source);
422 let meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, &file.source);
423 let contracts = collect_callback_contracts_for_module(
424 Some(module_name.as_str()),
425 &meta.behaviour_usages,
426 &meta.declared_callbacks,
427 &meta.optional_callbacks,
428 );
429
430 let snapshot = index.entry(module_name).or_default();
431 for (name, arity, _fqn) in functions {
432 snapshot.implemented_signatures.insert((name, arity));
433 }
434 for contract in contracts {
435 snapshot
436 .callback_signatures
437 .insert((contract.name, contract.arity));
438 }
439 }
440
441 index
442}
443
444pub fn append_csharp_structural_ir(
447 ir: &mut ProjectIr,
448 file_path: &str,
449 project_name: Option<String>,
450 tree: &Tree,
451 source: &str,
452) {
453 let (classes, _, properties) = extract_csharp_symbols(tree, source);
454 let language = LanguageId::CSharp.to_string();
455 for c in classes {
456 ir.classes.push(ClassIr {
457 fqn: c.fqn.clone(),
458 name: c.name,
459 path: file_path.to_string(),
460 language: language.clone(),
461 project_name: project_name.clone(),
462 kind: c.kind.map(str::to_string),
463 code_bytes: None,
464 });
465 ir.edges.push(EdgeIr {
466 kind: EdgeKind::DeclaresClass,
467 from_label: String::from("File"),
468 from_key: file_path.to_string(),
469 to_label: String::from("Class"),
470 to_key: c.fqn,
471 });
472 }
473 for p in properties {
474 ir.properties.push(PropertyIr {
475 fqn: p.fqn.clone(),
476 name: p.name,
477 class_fqn: p.class_fqn.clone(),
478 path: file_path.to_string(),
479 language: language.clone(),
480 project_name: project_name.clone(),
481 declared_type: p.declared_type,
482 code_bytes: None,
483 });
484 ir.edges.push(EdgeIr {
485 kind: EdgeKind::DeclaresProperty,
486 from_label: String::from("Class"),
487 from_key: p.class_fqn,
488 to_label: String::from("Property"),
489 to_key: p.fqn,
490 });
491 }
492}
493
494pub fn append_java_class_ir(
496 ir: &mut ProjectIr,
497 file_path: &str,
498 project_name: Option<String>,
499 tree: &Tree,
500 source: &str,
501) {
502 let package = extract_java_package(source);
503 let (classes, _) = extract_java_symbols(tree, source, package.as_deref());
504 let language = LanguageId::Java.to_string();
505 for c in classes {
506 ir.classes.push(ClassIr {
507 fqn: c.fqn.clone(),
508 name: c.name,
509 path: file_path.to_string(),
510 language: language.clone(),
511 project_name: project_name.clone(),
512 kind: c.kind.map(str::to_string),
513 code_bytes: None,
514 });
515 ir.edges.push(EdgeIr {
516 kind: EdgeKind::DeclaresClass,
517 from_label: String::from("File"),
518 from_key: file_path.to_string(),
519 to_label: String::from("Class"),
520 to_key: c.fqn,
521 });
522 }
523}
524
525pub async fn cleanup_incremental_targets_in_neo4j(
538 cfg: &Neo4jConfig,
539 root: &Path,
540 cleanup_targets: &[String],
541) -> Result<(), GraphError> {
542 if cleanup_targets.is_empty() {
543 println!("Neo4j cleanup: no cleanup targets, skipping.");
544 return Ok(());
545 }
546
547 let normalized_paths: Vec<String> = cleanup_targets
548 .iter()
549 .map(|target| neo4j_path_string(root, Path::new(target)))
550 .collect::<HashSet<_>>()
551 .into_iter()
552 .collect();
553
554 if normalized_paths.is_empty() {
555 println!("Neo4j cleanup: no normalized paths, skipping.");
556 return Ok(());
557 }
558
559 println!(
560 "Neo4j cleanup: deleting stale graph scope for {} path(s)...",
561 normalized_paths.len()
562 );
563
564 let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
565
566 let delete_files = query(
567 "
568 UNWIND $paths AS path
569 OPTIONAL MATCH (f:File { path: path })
570 DETACH DELETE f
571 ",
572 )
573 .param("paths", normalized_paths.clone());
574 graph.run(delete_files).await?;
575
576 let delete_modules = query(
577 "
578 UNWIND $paths AS path
579 OPTIONAL MATCH (m:Module { path: path })
580 DETACH DELETE m
581 ",
582 )
583 .param("paths", normalized_paths.clone());
584 graph.run(delete_modules).await?;
585
586 let delete_classes = query(
587 "
588 UNWIND $paths AS path
589 OPTIONAL MATCH (c:Class { path: path })
590 DETACH DELETE c
591 ",
592 )
593 .param("paths", normalized_paths.clone());
594 graph.run(delete_classes).await?;
595
596 let delete_functions = query(
597 "
598 UNWIND $paths AS path
599 OPTIONAL MATCH (fn:Function { path: path })
600 DETACH DELETE fn
601 ",
602 )
603 .param("paths", normalized_paths);
604 graph.run(delete_functions).await?;
605
606 println!("Neo4j cleanup: stale graph scope deleted.");
607 Ok(())
608}
609
610fn should_emit_parse_warnings_for_path(file_path: &str) -> bool {
639 let p = file_path.replace('\\', "/").to_lowercase();
640 if p.contains("/vendordocs/") {
641 return false;
642 }
643 if p.contains("/refund_issue/") {
644 return false;
645 }
646 true
647}
648
649fn emit_limited_parse_warnings(
650 label: &str,
651 file_path: &str,
652 warnings: Vec<(usize, usize, String)>,
653 max_per_file: usize,
654) {
655 if !should_emit_parse_warnings_for_path(file_path) {
656 return;
657 }
658 let total = warnings.len();
659 if max_per_file == 0 {
660 for (line, col, snippet) in warnings {
661 println!(
662 "{label} parse warning {file_path}:{line}:{col} - {snippet}",
663 );
664 }
665 return;
666 }
667 for (i, (line, col, snippet)) in warnings.into_iter().enumerate() {
668 if i >= max_per_file {
669 let rest = total.saturating_sub(i);
670 if rest > 0 {
671 println!(
672 "{label} parse warning {file_path}: ... {rest} more suppressed (set max_parse_warnings_per_file to 0 for unlimited)",
673 );
674 }
675 break;
676 }
677 println!(
678 "{label} parse warning {file_path}:{line}:{col} - {snippet}",
679 );
680 }
681}
682
683fn should_log_unresolved_import(
684 verbose_imports: bool,
685 is_stdlib: bool,
686 is_third_party: bool,
687) -> bool {
688 verbose_imports || (!is_stdlib && !is_third_party)
689}
690
691pub async fn persist_files_to_neo4j(
692 cfg: &Neo4jConfig,
693 root: &Path,
694 files: &[ParsedFile],
695 clean: bool,
696 follow_symlinks: bool,
697 persistence: &GraphPersistenceOptions,
698) -> Result<(), GraphError> {
699 let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
701
702 if clean {
704 println!("Neo4j: deleting all existing nodes and relationships...");
705 let delete_query = query("MATCH (n) DETACH DELETE n");
706 graph.run(delete_query).await?;
707 println!("Neo4j: database cleaned, starting fresh graph construction");
708 }
709
710 let mut known_paths: HashSet<String> = HashSet::new();
713 for file in files {
714 known_paths.insert(neo4j_path_string(root, &file.path));
715 }
716
717 let go_modules = discover_go_modules(root, follow_symlinks).unwrap_or_else(|e| {
718 eprintln!("Neo4j: warning: could not discover go.mod modules: {e}");
719 Vec::new()
720 });
721
722 let go_replaces = discover_go_replaces(root, follow_symlinks).unwrap_or_else(|e| {
723 eprintln!("Neo4j: warning: could not discover go.mod replace directives: {e}");
724 Vec::new()
725 });
726
727 let csharp_batch_index = build_csharp_batch_index(files, root);
728
729 let compressor_client = if persistence.compressor.enabled {
730 match CompressorClient::from_config(&persistence.compressor) {
731 Ok(client) => {
732 if let Err(e) = client.health_check().await {
733 eprintln!("RedCompressor: health check failed ({e}); compression may be unavailable");
734 }
735 Some(client)
736 }
737 Err(e) => {
738 eprintln!("RedCompressor: failed to create client ({e}); skipping code_bytes");
739 None
740 }
741 }
742 } else {
743 None
744 };
745 let compressor = compressor_client.as_ref();
746
747 let mut accumulator = BatchAccumulator::new();
749 let erlang_module_index = build_erlang_module_index(files);
750
751 let total_files = files.len();
752 let mut erlang_futures: FuturesUnordered<_> = FuturesUnordered::new();
753 let erlang_accumulator: SharedBatchAccumulator =
754 Arc::new(Mutex::new(BatchAccumulator::new()));
755
756 for (idx, file) in files.iter().enumerate() {
757 let language = file.language.to_string();
758 let path = neo4j_path_string(root, &file.path);
759 let project_name = derive_project_name(&file.path, root);
760
761 println!(
763 "Neo4j: processing file {}/{} ({})",
764 idx + 1,
765 total_files,
766 path
767 );
768
769 let q = query(
771 "
772 MERGE (f:File { path: $path })
773 ON CREATE SET f.language = $language,
774 f.project_name = $project_name,
775 f.is_test = $is_test
776 ON MATCH SET f.language = $language,
777 f.project_name = $project_name,
778 f.is_test = $is_test
779 ",
780 )
781 .param("path", path.clone())
782 .param("language", language.clone())
783 .param("project_name", project_name.clone())
784 .param("is_test", file.is_test);
785
786 graph.run(q).await?;
787
788 let source = &file.source;
790 match file.language {
791 LanguageId::Java => {
792 persist_java_structure(
793 &graph,
794 &path,
795 file,
796 source,
797 &known_paths,
798 project_name.clone(),
799 &mut accumulator,
800 persistence,
801 compressor,
802 )
803 .await?;
804 }
805 LanguageId::CSharp => {
806 persist_csharp_structure(
807 &graph,
808 &path,
809 file,
810 source,
811 project_name.clone(),
812 &known_paths,
813 &csharp_batch_index,
814 &mut accumulator,
815 compressor,
816 )
817 .await?;
818 }
819 LanguageId::Erlang => {
820 erlang_futures.push(persist_erlang_structure(
824 &graph,
825 file,
826 path.clone(),
827 source,
828 project_name.clone(),
829 &erlang_module_index,
830 erlang_accumulator.clone(),
831 compressor,
832 ));
833
834 if erlang_futures.len() >= MAX_CONCURRENT_ERLANG_WRITES {
835 if let Some(res) = erlang_futures.next().await {
836 res?;
837 }
838 flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
839 }
840 }
841 LanguageId::Go => {
842 persist_go_structure(
843 &graph,
844 &path,
845 file,
846 source,
847 &known_paths,
848 project_name.clone(),
849 &mut accumulator,
850 root,
851 &go_modules,
852 &go_replaces,
853 persistence,
854 compressor,
855 )
856 .await?;
857 }
858 _ => {
859 persist_non_java_functions(
864 &graph,
865 &path,
866 file,
867 source,
868 project_name.clone(),
869 &known_paths,
870 &mut accumulator,
871 persistence,
872 compressor,
873 )
874 .await?;
875 }
876 }
877
878 if accumulator.should_flush() {
880 accumulator.flush(&graph).await?;
881 }
882 flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
883 }
884
885 while let Some(res) = erlang_futures.next().await {
887 res?;
888 flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
889 }
890
891 accumulator.flush(&graph).await?;
893 flush_shared_accumulator_force(&erlang_accumulator, &graph).await?;
894
895 let same_api_query = query(
899 "
900 MATCH (ep:ApiEndpoint)
901 MATCH (ext:ExternalApi)
902 WHERE ep.norm_path IS NOT NULL
903 AND ext.norm_path IS NOT NULL
904 AND ep.norm_path = ext.norm_path
905 MERGE (ep)-[:SAME_API]->(ext)
906 ",
907 );
908
909 graph.run(same_api_query).await?;
910
911 println!("Neo4j: finished processing {} files.", total_files);
912
913 Ok(())
914}
915
916async fn persist_java_structure(
920 graph: &Graph,
921 file_path: &str,
922 file: &ParsedFile,
923 source: &str,
924 known_paths: &HashSet<String>,
925 project_name: Option<String>,
926 accumulator: &mut BatchAccumulator,
927 persistence: &GraphPersistenceOptions,
928 compressor: Option<&CompressorClient>,
929) -> Result<(), GraphError> {
930 let package = extract_java_package(source);
931 let (classes, methods) = extract_java_symbols(&file.tree, source, package.as_deref());
932 let class_spans = extract_java_class_spans(&file.tree, source, package.as_deref());
933 let method_spans = extract_java_method_body_spans(&file.tree, source, package.as_deref());
934
935 emit_limited_parse_warnings(
936 "Java",
937 file_path,
938 extract_java_parse_warnings(&file.tree, source),
939 persistence.max_parse_warnings_per_file,
940 );
941
942 let class_ann_map: HashMap<String, Vec<String>> =
943 extract_java_class_annotations(&file.tree, source, package.as_deref())
944 .into_iter()
945 .collect();
946 let method_ann_map: HashMap<String, Vec<String>> =
947 extract_java_method_annotations(&file.tree, source, package.as_deref())
948 .into_iter()
949 .collect();
950
951 for class in &classes {
953 let annotations = class_ann_map
954 .get(&class.fqn)
955 .cloned()
956 .unwrap_or_default();
957 let code_bytes = code_bytes_for_span(
958 compressor,
959 source,
960 class_spans.get(&class.fqn).copied(),
961 LanguageId::Java,
962 )
963 .await;
964 let q = query(
965 "
966 MATCH (f:File { path: $path })
967 MERGE (c:Class { fqn: $class_fqn })
968 ON CREATE SET c.name = $class_name,
969 c.path = $path,
970 c.project_name = $project_name,
971 c.annotations = $annotations,
972 c.code_bytes = $code_bytes
973 ON MATCH SET c.name = $class_name,
974 c.path = $path,
975 c.project_name = $project_name,
976 c.annotations = $annotations,
977 c.code_bytes = coalesce($code_bytes, c.code_bytes)
978 MERGE (f)-[:DECLARES_CLASS]->(c)
979 ",
980 )
981 .param("path", file_path.to_string())
982 .param("class_fqn", class.fqn.clone())
983 .param("class_name", class.name.clone())
984 .param("project_name", project_name.clone())
985 .param("annotations", annotations)
986 .param(props::CODE_BYTES, code_bytes);
987
988 graph.run(q).await?;
989 }
990
991 for func in &methods {
993 let fn_annotations = method_ann_map
994 .get(&func.fqn)
995 .cloned()
996 .unwrap_or_default();
997 let code_bytes = code_bytes_for_span(
998 compressor,
999 source,
1000 method_spans.get(&func.fqn).copied(),
1001 LanguageId::Java,
1002 )
1003 .await;
1004 match &func.class_fqn {
1005 Some(class_fqn) => {
1006 let q = query(
1007 "
1008 MATCH (f:File { path: $path })
1009 MERGE (cls:Class { fqn: $class_fqn })
1010 MERGE (fn:Function { fqn: $fn_fqn })
1011 ON CREATE SET fn.name = $fn_name,
1012 fn.path = $path,
1013 fn.project_name = $project_name,
1014 fn.return_type = $return_type,
1015 fn.param_types = $param_types,
1016 fn.param_count = $param_count,
1017 fn.annotations = $fn_annotations,
1018 fn.code_bytes = $code_bytes
1019 ON MATCH SET fn.name = $fn_name,
1020 fn.path = $path,
1021 fn.project_name = $project_name,
1022 fn.return_type = coalesce($return_type, fn.return_type),
1023 fn.param_types = coalesce($param_types, fn.param_types),
1024 fn.param_count = coalesce($param_count, fn.param_count),
1025 fn.annotations = $fn_annotations,
1026 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
1027 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1028 MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
1029 ",
1030 )
1031 .param("path", file_path.to_string())
1032 .param("class_fqn", class_fqn.clone())
1033 .param("fn_fqn", func.fqn.clone())
1034 .param("fn_name", func.name.clone())
1035 .param("project_name", project_name.clone())
1036 .param("return_type", func.return_type.clone())
1037 .param("param_types", func.param_types.clone())
1038 .param("param_count", func.param_count as i64)
1039 .param("fn_annotations", fn_annotations)
1040 .param(props::CODE_BYTES, code_bytes.clone());
1041
1042 graph.run(q).await?;
1043 }
1044 None => {
1045 let q = query(
1046 "
1047 MATCH (f:File { path: $path })
1048 MERGE (fn:Function { fqn: $fn_fqn })
1049 ON CREATE SET fn.name = $fn_name,
1050 fn.path = $path,
1051 fn.project_name = $project_name,
1052 fn.return_type = $return_type,
1053 fn.param_types = $param_types,
1054 fn.param_count = $param_count,
1055 fn.annotations = $fn_annotations,
1056 fn.code_bytes = $code_bytes
1057 ON MATCH SET fn.name = $fn_name,
1058 fn.path = $path,
1059 fn.project_name = $project_name,
1060 fn.return_type = coalesce($return_type, fn.return_type),
1061 fn.param_types = coalesce($param_types, fn.param_types),
1062 fn.param_count = coalesce($param_count, fn.param_count),
1063 fn.annotations = $fn_annotations,
1064 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
1065 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1066 ",
1067 )
1068 .param("path", file_path.to_string())
1069 .param("fn_fqn", func.fqn.clone())
1070 .param("fn_name", func.name.clone())
1071 .param("project_name", project_name.clone())
1072 .param("return_type", func.return_type.clone())
1073 .param("param_types", func.param_types.clone())
1074 .param("param_count", func.param_count as i64)
1075 .param("fn_annotations", fn_annotations)
1076 .param(props::CODE_BYTES, code_bytes);
1077
1078 graph.run(q).await?;
1079 }
1080 }
1081 }
1082
1083 for (derived, base) in extract_java_inheritance_edges(&file.tree, source, package.as_deref()) {
1084 accumulator.add_class_uses_class(derived, base);
1085 }
1086 for (cls, dep) in extract_java_injected_dependencies(&file.tree, source, package.as_deref()) {
1087 accumulator.add_class_uses_class(cls, dep);
1088 }
1089
1090 let calls = extract_java_calls(&file.tree, source, package.as_deref());
1098 for (caller_fqn, callee_fqn) in calls {
1099 accumulator.add_calls_function(caller_fqn, callee_fqn);
1100 }
1101
1102 let internal_imports = extract_internal_java_imports(source);
1108 for import_fqn in internal_imports {
1109 if let Some(dep_path) = map_import_to_project_path(file_path, &import_fqn) {
1110 if !known_paths.contains(&dep_path) {
1112 continue;
1113 }
1114
1115 let dep_query = query(
1117 "
1118 MERGE (src:File { path: $src_path })
1119 MERGE (dst:File { path: $dst_path })
1120 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
1121 ",
1122 )
1123 .param("src_path", file_path.to_string())
1124 .param("dst_path", dep_path.clone());
1125
1126 graph.run(dep_query).await?;
1127 }
1128 }
1129
1130 let endpoints = extract_java_spring_endpoints(source);
1135 for (http_methods, path_template, handler_name) in endpoints {
1136 let norm_path = normalize_api_path(&path_template);
1137
1138 let api_query = query(
1140 "
1141 MERGE (api:ApiEndpoint { path: $path })
1142 ON CREATE SET api.methods = $methods,
1143 api.protocol = 'http',
1144 api.framework = 'spring',
1145 api.project_name = $project_name,
1146 api.norm_path = $norm_path
1147 ON MATCH SET api.methods = $methods,
1148 api.protocol = coalesce(api.protocol, 'http'),
1149 api.framework = coalesce(api.framework, 'spring'),
1150 api.project_name = coalesce(api.project_name, $project_name),
1151 api.norm_path = coalesce(api.norm_path, $norm_path)
1152 ",
1153 )
1154 .param("path", path_template.clone())
1155 .param("methods", http_methods.clone())
1156 .param("project_name", project_name.clone())
1157 .param("norm_path", norm_path.clone());
1158
1159 graph.run(api_query).await?;
1160
1161 for func in &methods {
1163 if func.name != handler_name {
1164 continue;
1165 }
1166
1167 let rel_query = query(
1168 "
1169 MERGE (fn:Function { fqn: $fn_fqn })
1170 MERGE (api:ApiEndpoint { path: $path })
1171 MERGE (api)-[:HANDLED_BY]->(fn)
1172 ",
1173 )
1174 .param("fn_fqn", func.fqn.clone())
1175 .param("path", path_template.clone());
1176
1177 graph.run(rel_query).await?;
1178 }
1179 }
1180
1181 let external_urls = extract_external_http_urls(source);
1183 for full_url in external_urls {
1184 let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
1185 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
1186 let base_url = format!("{protocol}://{host}");
1187 let name = host.clone();
1188 let norm_path = normalize_api_path(&path);
1189
1190 let ext_query = query(
1192 "
1193 MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
1194 ON CREATE SET ext.name = $name,
1195 ext.path = $path,
1196 ext.protocol = $protocol,
1197 ext.provider = $provider,
1198 ext.project_name = $project_name
1199 ON MATCH SET ext.name = coalesce(ext.name, $name),
1200 ext.path = coalesce(ext.path, $path),
1201 ext.protocol = coalesce(ext.protocol, $protocol),
1202 ext.provider = coalesce(ext.provider, $provider),
1203 ext.project_name = coalesce(ext.project_name, $project_name)
1204 ",
1205 )
1206 .param("name", name.clone())
1207 .param("base_url", base_url.clone())
1208 .param("path", path.clone())
1209 .param("norm_path", norm_path.clone())
1210 .param("protocol", protocol.clone())
1211 .param("provider", name.clone())
1212 .param("project_name", project_name.clone());
1213
1214 graph.run(ext_query).await?;
1215
1216 for func in &methods {
1218 accumulator.add_calls_external_api(
1219 func.fqn.clone(),
1220 base_url.clone(),
1221 norm_path.clone(),
1222 );
1223 }
1224 }
1225
1226 let used_classes = extract_java_used_classes(&file.tree, source, package.as_deref());
1228 for (fn_fqn, class_fqn) in used_classes {
1229 accumulator.add_uses_class(fn_fqn, class_fqn);
1230 }
1231
1232 Ok(())
1233}
1234
1235async fn persist_csharp_structure(
1239 graph: &Graph,
1240 file_path: &str,
1241 file: &ParsedFile,
1242 source: &str,
1243 project_name: Option<String>,
1244 known_paths: &HashSet<String>,
1245 csharp_index: &CSharpBatchIndex,
1246 accumulator: &mut BatchAccumulator,
1247 compressor: Option<&CompressorClient>,
1248) -> Result<(), GraphError> {
1249 let language = file.language.to_string();
1250 let namespace = extract_csharp_namespace(&file.tree, source);
1251 let using_summary = extract_csharp_using_summary(&file.tree, source);
1252
1253 let (classes, methods, property_symbols) = extract_csharp_symbols(&file.tree, source);
1254 let class_spans = extract_csharp_class_spans(&file.tree, source);
1255 let property_spans = extract_csharp_property_spans(&file.tree, source);
1256 let method_spans = extract_csharp_method_body_spans_map(&file.tree, source, namespace.as_deref());
1257
1258 for chunk in classes.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
1260 let class_fqns: Vec<String> = chunk.iter().map(|c| c.fqn.clone()).collect();
1261 let class_names: Vec<String> = chunk.iter().map(|c| c.name.clone()).collect();
1262 let class_kinds: Vec<String> = chunk
1263 .iter()
1264 .map(|c| c.kind.unwrap_or("class").to_string())
1265 .collect();
1266 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
1267 for c in chunk {
1268 code_bytes_list.push(
1269 code_bytes_for_span(
1270 compressor,
1271 source,
1272 class_spans.get(&c.fqn).copied(),
1273 LanguageId::CSharp,
1274 )
1275 .await,
1276 );
1277 }
1278 let q = query(
1279 "
1280 UNWIND range(0, size($class_fqns) - 1) AS i
1281 WITH $file_path AS path, $class_fqns[i] AS class_fqn, $class_names[i] AS class_name,
1282 $class_kinds[i] AS class_kind, $project_name AS project_name, $language AS language,
1283 $code_bytes_list[i] AS code_bytes
1284 MATCH (f:File { path: path })
1285 MERGE (c:Class { fqn: class_fqn })
1286 ON CREATE SET c.name = class_name, c.path = path, c.project_name = project_name,
1287 c.language = language, c.kind = class_kind, c.code_bytes = code_bytes
1288 ON MATCH SET c.name = class_name, c.project_name = project_name, c.language = language,
1289 c.kind = class_kind, c.code_bytes = coalesce(code_bytes, c.code_bytes)
1290 MERGE (f)-[:DECLARES_CLASS]->(c)
1291 ",
1292 )
1293 .param("file_path", file_path.to_string())
1294 .param("class_fqns", class_fqns)
1295 .param("class_names", class_names)
1296 .param("class_kinds", class_kinds)
1297 .param("project_name", project_name.clone())
1298 .param("language", language.clone())
1299 .param("code_bytes_list", code_bytes_list);
1300
1301 graph.run(q).await?;
1302 }
1303
1304 for chunk in property_symbols.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
1305 let class_fqns: Vec<String> = chunk.iter().map(|p| p.class_fqn.clone()).collect();
1306 let prop_fqns: Vec<String> = chunk.iter().map(|p| p.fqn.clone()).collect();
1307 let prop_names: Vec<String> = chunk.iter().map(|p| p.name.clone()).collect();
1308 let decl_types: Vec<Option<String>> = chunk.iter().map(|p| p.declared_type.clone()).collect();
1309 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
1310 for p in chunk {
1311 code_bytes_list.push(
1312 code_bytes_for_span(
1313 compressor,
1314 source,
1315 property_spans.get(&p.fqn).copied(),
1316 LanguageId::CSharp,
1317 )
1318 .await,
1319 );
1320 }
1321 let q = query(
1322 "
1323 UNWIND range(0, size($prop_fqns) - 1) AS i
1324 WITH $class_fqns[i] AS class_fqn, $prop_fqns[i] AS prop_fqn, $prop_names[i] AS prop_name,
1325 $decl_types[i] AS declared_type, $file_path AS path,
1326 $project_name AS project_name, $language AS language,
1327 $code_bytes_list[i] AS code_bytes
1328 MATCH (c:Class { fqn: class_fqn })
1329 MERGE (p:Property { fqn: prop_fqn })
1330 ON CREATE SET p.name = prop_name, p.path = path, p.project_name = project_name,
1331 p.language = language, p.declared_type = declared_type, p.code_bytes = code_bytes
1332 ON MATCH SET p.name = prop_name, p.project_name = project_name, p.language = language,
1333 p.declared_type = coalesce(declared_type, p.declared_type),
1334 p.code_bytes = coalesce(code_bytes, p.code_bytes)
1335 MERGE (c)-[:DECLARES_PROPERTY]->(p)
1336 ",
1337 )
1338 .param("class_fqns", class_fqns)
1339 .param("prop_fqns", prop_fqns)
1340 .param("prop_names", prop_names)
1341 .param("decl_types", decl_types)
1342 .param("file_path", file_path.to_string())
1343 .param("project_name", project_name.clone())
1344 .param("language", language.clone())
1345 .param("code_bytes_list", code_bytes_list);
1346
1347 graph.run(q).await?;
1348 }
1349
1350 let methods_with_class: Vec<&FunctionSymbol> =
1351 methods.iter().filter(|f| f.class_fqn.is_some()).collect();
1352 for chunk in methods_with_class.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
1353 let class_fqns: Vec<String> = chunk
1354 .iter()
1355 .map(|f| f.class_fqn.clone().unwrap_or_default())
1356 .collect();
1357 let fn_fqns: Vec<String> = chunk.iter().map(|f| f.fqn.clone()).collect();
1358 let fn_names: Vec<String> = chunk.iter().map(|f| f.name.clone()).collect();
1359 let return_types: Vec<Option<String>> = chunk.iter().map(|f| f.return_type.clone()).collect();
1360 let param_types_list: Vec<Vec<String>> = chunk.iter().map(|f| f.param_types.clone()).collect();
1361 let param_counts: Vec<i64> = chunk.iter().map(|f| f.param_count as i64).collect();
1362 let modifiers_list: Vec<Vec<String>> = chunk.iter().map(|f| f.modifiers.clone()).collect();
1363 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
1364 for f in chunk {
1365 code_bytes_list.push(
1366 code_bytes_for_span(
1367 compressor,
1368 source,
1369 method_spans.get(&f.fqn).copied(),
1370 LanguageId::CSharp,
1371 )
1372 .await,
1373 );
1374 }
1375 let q = query(
1376 "
1377 UNWIND range(0, size($fn_fqns) - 1) AS i
1378 WITH $file_path AS path, $class_fqns[i] AS class_fqn, $fn_fqns[i] AS fn_fqn,
1379 $fn_names[i] AS fn_name, $return_types[i] AS return_type,
1380 $param_types_list[i] AS param_types, $param_counts[i] AS param_count,
1381 $modifiers_list[i] AS modifiers, $project_name AS project_name, $language AS language,
1382 $code_bytes_list[i] AS code_bytes
1383 MATCH (f:File { path: path })
1384 MERGE (cls:Class { fqn: class_fqn })
1385 MERGE (fn:Function { fqn: fn_fqn })
1386 ON CREATE SET fn.name = fn_name, fn.path = path, fn.project_name = project_name,
1387 fn.language = language, fn.return_type = return_type,
1388 fn.param_types = param_types, fn.param_count = param_count,
1389 fn.modifiers = modifiers, fn.code_bytes = code_bytes
1390 ON MATCH SET fn.name = fn_name, fn.project_name = project_name, fn.language = language,
1391 fn.return_type = coalesce(return_type, fn.return_type),
1392 fn.param_types = coalesce(param_types, fn.param_types),
1393 fn.param_count = coalesce(param_count, fn.param_count),
1394 fn.modifiers = coalesce(modifiers, fn.modifiers),
1395 fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
1396 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1397 MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
1398 ",
1399 )
1400 .param("file_path", file_path.to_string())
1401 .param("class_fqns", class_fqns)
1402 .param("fn_fqns", fn_fqns)
1403 .param("fn_names", fn_names)
1404 .param("return_types", return_types)
1405 .param("param_types_list", param_types_list)
1406 .param("param_counts", param_counts)
1407 .param("modifiers_list", modifiers_list)
1408 .param("project_name", project_name.clone())
1409 .param("language", language.clone())
1410 .param("code_bytes_list", code_bytes_list);
1411
1412 graph.run(q).await?;
1413 }
1414
1415 let methods_top: Vec<&FunctionSymbol> = methods.iter().filter(|f| f.class_fqn.is_none()).collect();
1416 for chunk in methods_top.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
1417 let fn_fqns: Vec<String> = chunk.iter().map(|f| f.fqn.clone()).collect();
1418 let fn_names: Vec<String> = chunk.iter().map(|f| f.name.clone()).collect();
1419 let return_types: Vec<Option<String>> = chunk.iter().map(|f| f.return_type.clone()).collect();
1420 let param_types_list: Vec<Vec<String>> = chunk.iter().map(|f| f.param_types.clone()).collect();
1421 let param_counts: Vec<i64> = chunk.iter().map(|f| f.param_count as i64).collect();
1422 let modifiers_list: Vec<Vec<String>> = chunk.iter().map(|f| f.modifiers.clone()).collect();
1423 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
1424 for f in chunk {
1425 code_bytes_list.push(
1426 code_bytes_for_span(
1427 compressor,
1428 source,
1429 method_spans.get(&f.fqn).copied(),
1430 LanguageId::CSharp,
1431 )
1432 .await,
1433 );
1434 }
1435 let q = query(
1436 "
1437 UNWIND range(0, size($fn_fqns) - 1) AS i
1438 WITH $file_path AS path, $fn_fqns[i] AS fn_fqn, $fn_names[i] AS fn_name,
1439 $return_types[i] AS return_type, $param_types_list[i] AS param_types,
1440 $param_counts[i] AS param_count, $modifiers_list[i] AS modifiers,
1441 $project_name AS project_name, $language AS language,
1442 $code_bytes_list[i] AS code_bytes
1443 MATCH (f:File { path: path })
1444 MERGE (fn:Function { fqn: fn_fqn })
1445 ON CREATE SET fn.name = fn_name, fn.path = path, fn.project_name = project_name,
1446 fn.language = language, fn.return_type = return_type,
1447 fn.param_types = param_types, fn.param_count = param_count,
1448 fn.modifiers = modifiers, fn.code_bytes = code_bytes
1449 ON MATCH SET fn.name = fn_name, fn.project_name = project_name, fn.language = language,
1450 fn.return_type = coalesce(return_type, fn.return_type),
1451 fn.param_types = coalesce(param_types, fn.param_types),
1452 fn.param_count = coalesce(param_count, fn.param_count),
1453 fn.modifiers = coalesce(modifiers, fn.modifiers),
1454 fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
1455 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1456 ",
1457 )
1458 .param("file_path", file_path.to_string())
1459 .param("fn_fqns", fn_fqns)
1460 .param("fn_names", fn_names)
1461 .param("return_types", return_types)
1462 .param("param_types_list", param_types_list)
1463 .param("param_counts", param_counts)
1464 .param("modifiers_list", modifiers_list)
1465 .param("project_name", project_name.clone())
1466 .param("language", language.clone())
1467 .param("code_bytes_list", code_bytes_list);
1468
1469 graph.run(q).await?;
1470 }
1471
1472 let endpoints = extract_csharp_api_endpoints_from_tree(&file.tree, source);
1487 for (methods_http, path_template, handler_name) in endpoints {
1488 let norm_path = normalize_api_path(&path_template);
1489
1490 let api_query = query(
1492 "
1493 MERGE (api:ApiEndpoint { path: $path })
1494 ON CREATE SET api.methods = $methods,
1495 api.protocol = 'http',
1496 api.framework = 'aspnet',
1497 api.project_name = $project_name,
1498 api.norm_path = $norm_path
1499 ON MATCH SET api.methods = $methods,
1500 api.protocol = coalesce(api.protocol, 'http'),
1501 api.framework = coalesce(api.framework, 'aspnet'),
1502 api.project_name = coalesce(api.project_name, $project_name),
1503 api.norm_path = coalesce(api.norm_path, $norm_path)
1504 ",
1505 )
1506 .param("path", path_template.clone())
1507 .param("methods", methods_http.clone())
1508 .param("project_name", project_name.clone())
1509 .param("norm_path", norm_path.clone());
1510
1511 graph.run(api_query).await?;
1512
1513 for func in &methods {
1515 if func.name != handler_name {
1516 continue;
1517 }
1518
1519 let rel_query = query(
1520 "
1521 MERGE (fn:Function { fqn: $fn_fqn })
1522 MERGE (api:ApiEndpoint { path: $path })
1523 MERGE (api)-[:HANDLED_BY]->(fn)
1524 ",
1525 )
1526 .param("fn_fqn", func.fqn.clone())
1527 .param("path", path_template.clone());
1528
1529 graph.run(rel_query).await?;
1530 }
1531 }
1532
1533 let url_spans = extract_csharp_external_http_urls_with_spans(&file.tree, source);
1535 let method_spans = csharp_method_body_spans(&file.tree, source, namespace.as_deref());
1536 let mut spans_by_fqn: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
1537 for (fqn, lo, hi) in method_spans {
1538 spans_by_fqn.entry(fqn).or_default().push((lo, hi));
1539 }
1540
1541 for (full_url, u_start, u_end) in url_spans {
1542 let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
1543 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
1544 let base_url = format!("{protocol}://{host}");
1545 let name = host.clone();
1546 let norm_path = normalize_api_path(&path);
1547
1548 let ext_query = query(
1549 "
1550 MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
1551 ON CREATE SET ext.name = $name,
1552 ext.path = $path,
1553 ext.protocol = $protocol,
1554 ext.provider = $provider,
1555 ext.project_name = $project_name
1556 ON MATCH SET ext.name = coalesce(ext.name, $name),
1557 ext.path = coalesce(ext.path, $path),
1558 ext.protocol = coalesce(ext.protocol, $protocol),
1559 ext.provider = coalesce(ext.provider, $provider),
1560 ext.project_name = coalesce(ext.project_name, $project_name)
1561 ",
1562 )
1563 .param("name", name.clone())
1564 .param("base_url", base_url.clone())
1565 .param("path", path.clone())
1566 .param("norm_path", norm_path.clone())
1567 .param("protocol", protocol.clone())
1568 .param("provider", name.clone())
1569 .param("project_name", project_name.clone());
1570
1571 graph.run(ext_query).await?;
1572
1573 for func in &methods {
1574 let Some(ranges) = spans_by_fqn.get(&func.fqn) else {
1575 continue;
1576 };
1577 if !ranges
1578 .iter()
1579 .any(|(lo, hi)| *lo <= u_start && u_end <= *hi)
1580 {
1581 continue;
1582 }
1583 accumulator.add_calls_external_api(
1584 func.fqn.clone(),
1585 base_url.clone(),
1586 norm_path.clone(),
1587 );
1588 }
1589 }
1590
1591 for ns in &using_summary.namespace_imports {
1593 let Some(dep_paths) = csharp_index.namespace_to_paths.get(ns) else {
1594 continue;
1595 };
1596 for dep_path in dep_paths {
1597 if dep_path == file_path || !known_paths.contains(dep_path) {
1598 continue;
1599 }
1600 let dep_query = query(
1601 "
1602 MERGE (src:File { path: $src_path })
1603 MERGE (dst:File { path: $dst_path })
1604 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
1605 ",
1606 )
1607 .param("src_path", file_path.to_string())
1608 .param("dst_path", dep_path.clone());
1609
1610 graph.run(dep_query).await?;
1611 }
1612 }
1613
1614 for (derived, base) in extract_csharp_class_inheritance_edges(
1615 &file.tree,
1616 source,
1617 namespace.as_deref(),
1618 &using_summary,
1619 csharp_index,
1620 ) {
1621 accumulator.add_class_uses_class(derived, base);
1622 }
1623
1624 let used_classes = extract_csharp_used_classes(
1626 &file.tree,
1627 source,
1628 namespace.as_deref(),
1629 &using_summary,
1630 csharp_index,
1631 );
1632 for (fn_fqn, class_fqn) in used_classes {
1633 accumulator.add_uses_class(fn_fqn, class_fqn);
1634 }
1635
1636 let calls = extract_csharp_calls(
1638 &file.tree,
1639 source,
1640 namespace.as_deref(),
1641 &using_summary,
1642 csharp_index,
1643 );
1644 for (caller_fqn, callee_fqn) in calls {
1645 accumulator.add_calls_function(caller_fqn, callee_fqn);
1646 }
1647
1648 Ok(())
1649}
1650
1651async fn persist_erlang_structure(
1661 graph: &Graph,
1662 file: &ParsedFile,
1663 file_path: String,
1664 source: &str,
1665 project_name: Option<String>,
1666 erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
1667 erlang_accumulator: SharedBatchAccumulator,
1668 compressor: Option<&CompressorClient>,
1669) -> Result<(), GraphError> {
1670 let language = file.language.to_string();
1671
1672 let module_name = resolve_erlang_module_name(&file.path, &file.tree, source);
1676 let erlang_meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, source);
1677
1678 if let Some(module_name) = module_name.as_ref() {
1680 let module_code_bytes = match compressor {
1681 Some(client) => compress_full_source(source, LanguageId::Erlang, client).await,
1682 None => None,
1683 };
1684 let module_query = query(
1685 "
1686 MATCH (f:File { path: $path })
1687 MERGE (m:Module { name: $module_name, path: $path })
1688 ON CREATE SET m.language = $language,
1689 m.project_name = $project_name,
1690 m.code_bytes = $code_bytes
1691 ON MATCH SET m.language = $language,
1692 m.project_name = $project_name,
1693 m.code_bytes = coalesce($code_bytes, m.code_bytes)
1694 MERGE (f)-[:DECLARES_MODULE]->(m)
1695 ",
1696 )
1697 .param("path", file_path.to_string())
1698 .param("module_name", module_name.clone())
1699 .param("language", language.clone())
1700 .param("project_name", project_name.clone())
1701 .param(props::CODE_BYTES, module_code_bytes);
1702
1703 graph.run(module_query).await?;
1704 }
1705
1706 let functions = if let Some(module_name) = module_name.as_ref() {
1708 extract_erlang_functions(module_name, &file.tree, source)
1709 } else {
1710 Vec::new()
1711 };
1712 let function_spans = module_name.as_ref().map(|module_name| {
1713 extract_erlang_function_spans(module_name, &file.tree, source)
1714 }).unwrap_or_default();
1715 if !functions.is_empty() {
1717 let module_name = module_name.as_ref().expect("module must exist when functions exist");
1718 let mut fn_fqns: Vec<String> = Vec::with_capacity(functions.len());
1721 let mut fn_names: Vec<String> = Vec::with_capacity(functions.len());
1722 let mut fn_arities: Vec<i64> = Vec::with_capacity(functions.len());
1723 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(functions.len());
1724
1725 for (fun_name, arity, fqn) in &functions {
1726 fn_fqns.push(fqn.clone());
1727 fn_names.push(fun_name.clone());
1728 fn_arities.push(*arity as i64);
1729 code_bytes_list.push(
1730 code_bytes_for_span(
1731 compressor,
1732 source,
1733 function_spans.get(fqn).copied(),
1734 LanguageId::Erlang,
1735 )
1736 .await,
1737 );
1738 }
1739
1740 let fun_query = query(
1741 "
1742 MATCH (f:File { path: $path })
1743 MATCH (m:Module { name: $module_name, path: $path })
1744 WITH f, m,
1745 $fn_fqns AS fn_fqns,
1746 $fn_names AS fn_names,
1747 $fn_arities AS fn_arities,
1748 $code_bytes_list AS code_bytes_list,
1749 $language AS language,
1750 $path AS path,
1751 $project_name AS project_name
1752 UNWIND range(0, size(fn_fqns) - 1) AS idx
1753 WITH f, m, language, path, project_name,
1754 fn_fqns[idx] AS fn_fqn,
1755 fn_names[idx] AS fn_name,
1756 fn_arities[idx] AS arity,
1757 code_bytes_list[idx] AS code_bytes
1758 MERGE (fn:Function { fqn: fn_fqn })
1759 ON CREATE SET fn.name = fn_name,
1760 fn.path = path,
1761 fn.language = language,
1762 fn.project_name = project_name,
1763 fn.arity = arity,
1764 fn.code_bytes = code_bytes
1765 ON MATCH SET fn.name = fn_name,
1766 fn.path = path,
1767 fn.language = language,
1768 fn.project_name = project_name,
1769 fn.arity = arity,
1770 fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
1771 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1772 MERGE (m)-[:DECLARES_FUNCTION]->(fn)
1773 ",
1774 )
1775 .param("path", file_path.to_string())
1776 .param("module_name", module_name.clone())
1777 .param("language", language.clone())
1778 .param("fn_fqns", fn_fqns)
1779 .param("fn_names", fn_names)
1780 .param("fn_arities", fn_arities)
1781 .param("code_bytes_list", code_bytes_list)
1782 .param("project_name", project_name.clone());
1783
1784 graph.run(fun_query).await?;
1785 }
1786
1787 let callback_contracts = collect_callback_contracts_for_module(
1790 module_name.as_deref(),
1791 &erlang_meta.behaviour_usages,
1792 &erlang_meta.declared_callbacks,
1793 &erlang_meta.optional_callbacks,
1794 );
1795
1796 if let Some(module_name) = module_name.as_ref() {
1797 for behaviour in &erlang_meta.behaviour_usages {
1799 let behaviour_query = query(
1800 "
1801 MATCH (m:Module { name: $module_name, path: $path })
1802 MERGE (b:Behaviour { name: $behaviour })
1803 ON CREATE SET b.language = $language,
1804 b.project_name = $project_name
1805 ON MATCH SET b.language = coalesce(b.language, $language),
1806 b.project_name = coalesce(b.project_name, $project_name)
1807 MERGE (m)-[:IMPLEMENTS_BEHAVIOUR]->(b)
1808 ",
1809 )
1810 .param("module_name", module_name.clone())
1811 .param("path", file_path.to_string())
1812 .param("behaviour", behaviour.clone())
1813 .param("language", language.clone())
1814 .param("project_name", project_name.clone());
1815 graph.run(behaviour_query).await?;
1816
1817 let dep_path = guess_erlang_file_path_from_module(&file_path, behaviour);
1821 let module_dep_query = query(
1822 "
1823 MATCH (m:Module { name: $module_name, path: $path })
1824 MERGE (dst:File { path: $dst_path })
1825 MERGE (m)-[:DEPENDS_ON_FILE]->(dst)
1826 ",
1827 )
1828 .param("module_name", module_name.clone())
1829 .param("path", file_path.to_string())
1830 .param("dst_path", dep_path);
1831 graph.run(module_dep_query).await?;
1832 }
1833
1834 if !erlang_meta.declared_callbacks.is_empty() {
1837 let file_declares_behaviour_query = query(
1838 "
1839 MATCH (f:File { path: $path })
1840 MERGE (b:Behaviour { name: $behaviour })
1841 ON CREATE SET b.path = $path,
1842 b.language = $language,
1843 b.project_name = $project_name
1844 ON MATCH SET b.path = coalesce(b.path, $path),
1845 b.language = coalesce(b.language, $language),
1846 b.project_name = coalesce(b.project_name, $project_name)
1847 MERGE (f)-[:DECLARES_BEHAVIOUR]->(b)
1848 ",
1849 )
1850 .param("path", file_path.to_string())
1851 .param("behaviour", module_name.clone())
1852 .param("language", language.clone())
1853 .param("project_name", project_name.clone());
1854 graph.run(file_declares_behaviour_query).await?;
1855 }
1856
1857 for parent_behaviour in &erlang_meta.behaviour_extensions {
1859 let extends_query = query(
1860 "
1861 MERGE (child:Behaviour { name: $child })
1862 MERGE (parent:Behaviour { name: $parent })
1863 MERGE (child)-[:EXTENDS_BEHAVIOUR]->(parent)
1864 ",
1865 )
1866 .param("child", module_name.clone())
1867 .param("parent", parent_behaviour.clone());
1868 graph.run(extends_query).await?;
1869 }
1870 }
1871
1872 for contract in &callback_contracts {
1874 let callback_fqn = format!(
1875 "{behaviour}:{name}/{arity}",
1876 behaviour = contract.behaviour,
1877 name = contract.name,
1878 arity = contract.arity
1879 );
1880 let callback_query = query(
1881 "
1882 MERGE (b:Behaviour { name: $behaviour })
1883 MERGE (cb:Callback { fqn: $cb_fqn })
1884 ON CREATE SET cb.name = $cb_name,
1885 cb.arity = $cb_arity,
1886 cb.optional = $cb_optional,
1887 cb.language = $language,
1888 cb.project_name = $project_name
1889 ON MATCH SET cb.name = coalesce(cb.name, $cb_name),
1890 cb.arity = coalesce(cb.arity, $cb_arity),
1891 cb.optional = $cb_optional,
1892 cb.language = coalesce(cb.language, $language),
1893 cb.project_name = coalesce(cb.project_name, $project_name)
1894 MERGE (b)-[:DECLARES_CALLBACK]->(cb)
1895 ",
1896 )
1897 .param("behaviour", contract.behaviour.clone())
1898 .param("cb_fqn", callback_fqn.clone())
1899 .param("cb_name", contract.name.clone())
1900 .param("cb_arity", contract.arity as i64)
1901 .param("cb_optional", contract.optional)
1902 .param("language", language.clone())
1903 .param("project_name", project_name.clone());
1904 graph.run(callback_query).await?;
1905 }
1906
1907 let function_by_sig: HashMap<(String, u32), String> = functions
1910 .iter()
1911 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
1912 .collect();
1913
1914 for contract in &callback_contracts {
1915 let key = (contract.name.clone(), contract.arity);
1916 let Some(fn_fqn) = function_by_sig.get(&key) else {
1917 continue;
1918 };
1919
1920 let callback_fqn = format!(
1921 "{behaviour}:{name}/{arity}",
1922 behaviour = contract.behaviour,
1923 name = contract.name,
1924 arity = contract.arity
1925 );
1926 let implements_query = query(
1927 "
1928 MERGE (fn:Function { fqn: $fn_fqn })
1929 MERGE (cb:Callback { fqn: $cb_fqn })
1930 MERGE (fn)-[:IMPLEMENTS_CALLBACK]->(cb)
1931 ",
1932 )
1933 .param("fn_fqn", fn_fqn.clone())
1934 .param("cb_fqn", callback_fqn.clone());
1935 graph.run(implements_query).await?;
1936 }
1937
1938 for (name, arity) in erlang_meta.overridden_callbacks {
1940 let key = (name.clone(), arity);
1941 let Some(fn_fqn) = function_by_sig.get(&key) else {
1942 continue;
1943 };
1944 for contract in callback_contracts
1945 .iter()
1946 .filter(|c| c.name == name && c.arity == arity)
1947 {
1948 let callback_fqn = format!(
1949 "{behaviour}:{name}/{arity}",
1950 behaviour = contract.behaviour,
1951 name = contract.name,
1952 arity = contract.arity
1953 );
1954 let overrides_query = query(
1955 "
1956 MERGE (fn:Function { fqn: $fn_fqn })
1957 MERGE (cb:Callback { fqn: $cb_fqn })
1958 MERGE (fn)-[:OVERRIDES_CALLBACK]->(cb)
1959 ",
1960 )
1961 .param("fn_fqn", fn_fqn.clone())
1962 .param("cb_fqn", callback_fqn);
1963 graph.run(overrides_query).await?;
1964 }
1965 }
1966
1967 let endpoints = extract_erlang_api_endpoints(&file.tree, source);
1975 for (methods, path_template, handler_module) in endpoints {
1976 let norm_path = normalize_api_path(&path_template);
1977
1978 let api_query = query(
1980 "
1981 MERGE (api:ApiEndpoint { path: $path })
1982 ON CREATE SET api.methods = $methods,
1983 api.protocol = 'http',
1984 api.framework = 'cowboy',
1985 api.project_name = $project_name,
1986 api.norm_path = $norm_path
1987 ON MATCH SET api.methods = $methods,
1988 api.protocol = coalesce(api.protocol, 'http'),
1989 api.framework = coalesce(api.framework, 'cowboy'),
1990 api.project_name = coalesce(api.project_name, $project_name),
1991 api.norm_path = coalesce(api.norm_path, $norm_path)
1992 ",
1993 )
1994 .param("path", path_template.clone())
1995 .param("methods", methods.clone())
1996 .param("project_name", project_name.clone())
1997 .param("norm_path", norm_path.clone());
1998
1999 graph.run(api_query).await?;
2000
2001 let candidate_fqns = select_endpoint_handler_fqns(&handler_module, erlang_module_index);
2005 for fqn in candidate_fqns {
2006 let rel_query = query(
2007 "
2008 MERGE (fn:Function { fqn: $fn_fqn })
2009 MERGE (api:ApiEndpoint { path: $path })
2010 MERGE (api)-[:HANDLED_BY]->(fn)
2011 ",
2012 )
2013 .param("fn_fqn", fqn)
2014 .param("path", path_template.clone());
2015
2016 graph.run(rel_query).await?;
2017 }
2018 }
2019
2020 let external_urls = extract_external_http_urls_from_tree(&file.tree, source);
2026 for full_url in external_urls {
2027 let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
2028 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
2029 let base_url = format!("{protocol}://{host}");
2030 let name = host.clone();
2031 let norm_path = normalize_api_path(&path);
2032
2033 let ext_query = query(
2034 "
2035 MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
2036 ON CREATE SET ext.name = $name,
2037 ext.path = $path,
2038 ext.protocol = $protocol,
2039 ext.provider = $provider,
2040 ext.project_name = $project_name
2041 ON MATCH SET ext.name = coalesce(ext.name, $name),
2042 ext.path = coalesce(ext.path, $path),
2043 ext.protocol = coalesce(ext.protocol, $protocol),
2044 ext.provider = coalesce(ext.provider, $provider),
2045 ext.project_name = coalesce(ext.project_name, $project_name)
2046 ",
2047 )
2048 .param("name", name.clone())
2049 .param("base_url", base_url.clone())
2050 .param("path", path.clone())
2051 .param("norm_path", norm_path.clone())
2052 .param("protocol", protocol.clone())
2053 .param("provider", name.clone())
2054 .param("project_name", project_name.clone());
2055
2056 graph.run(ext_query).await?;
2057
2058 if !functions.is_empty() {
2062 let mut guard = erlang_accumulator.lock().await;
2063 for (_fun_name, _arity, fqn) in &functions {
2064 guard.add_calls_external_api(fqn.clone(), base_url.clone(), norm_path.clone());
2065 }
2066 }
2067 }
2068
2069 let function_by_sig: HashMap<(String, u32), String> = functions
2078 .iter()
2079 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
2080 .collect();
2081 let call_edges: Vec<(String, String)> = extract_erlang_call_edges(
2082 &file.tree,
2083 source,
2084 module_name.as_deref(),
2085 &function_by_sig,
2086 )
2087 .into_iter()
2088 .collect();
2089
2090 if !call_edges.is_empty() {
2091 let mut guard = erlang_accumulator.lock().await;
2092 for (caller_fqn, callee_fqn) in call_edges {
2093 guard.add_calls_function(caller_fqn, callee_fqn);
2094 }
2095 }
2096
2097 let called_modules = extract_erlang_called_modules_from_tree(&file.tree, source);
2103 for callee_mod in called_modules {
2104 if module_name.as_deref() == Some(callee_mod.as_str()) {
2105 continue;
2106 }
2107 let dep_path = guess_erlang_file_path_from_module(&file_path, &callee_mod);
2108 let dep_query = query(
2109 "
2110 MERGE (src:File { path: $src_path })
2111 MERGE (dst:File { path: $dst_path })
2112 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
2113 ",
2114 )
2115 .param("src_path", file_path.to_string())
2116 .param("dst_path", dep_path);
2117
2118 graph.run(dep_query).await?;
2119
2120 if let Some(module_name) = module_name.as_ref() {
2121 let module_dep_query = query(
2122 "
2123 MATCH (m:Module { name: $module_name, path: $path })
2124 MERGE (dst:File { path: $dst_path })
2125 MERGE (m)-[:DEPENDS_ON_FILE]->(dst)
2126 ",
2127 )
2128 .param("module_name", module_name.clone())
2129 .param("path", file_path.to_string())
2130 .param(
2131 "dst_path",
2132 guess_erlang_file_path_from_module(&file_path, &callee_mod),
2133 );
2134
2135 graph.run(module_dep_query).await?;
2136 }
2137 }
2138
2139 Ok(())
2140}
2141
2142async fn persist_non_java_functions(
2146 graph: &Graph,
2147 file_path: &str,
2148 file: &ParsedFile,
2149 source: &str,
2150 project_name: Option<String>,
2151 known_paths: &HashSet<String>,
2152 accumulator: &mut BatchAccumulator,
2153 persistence: &GraphPersistenceOptions,
2154 compressor: Option<&CompressorClient>,
2155) -> Result<(), GraphError> {
2156 let language = file.language.to_string();
2157
2158 match file.language {
2159 LanguageId::Python => {
2160 emit_limited_parse_warnings(
2161 "Python",
2162 file_path,
2163 extract_python_parse_warnings(&file.tree, source),
2164 persistence.max_parse_warnings_per_file,
2165 );
2166 }
2167 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
2168 emit_limited_parse_warnings(
2169 "JS/TS",
2170 file_path,
2171 extract_js_ts_parse_warnings(&file.tree, source),
2172 persistence.max_parse_warnings_per_file,
2173 );
2174 }
2175 _ => {}
2176 }
2177
2178 let functions = extract_non_java_function_symbols(file, source, file_path);
2179 let function_spans = extract_non_java_function_body_spans(file, source, file_path);
2180
2181 let mut name_to_fqn_depth: HashMap<String, (String, usize)> = HashMap::new();
2182 for f in &functions {
2183 let logical = f
2184 .fqn
2185 .split_once("::")
2186 .map(|(_, l)| l)
2187 .unwrap_or(f.fqn.as_str());
2188 let (short, depth) = non_java_short_name_and_depth(file.language, logical);
2189 name_to_fqn_depth
2190 .entry(short)
2191 .and_modify(|(existing_fqn, existing_depth)| {
2192 if depth > *existing_depth {
2193 *existing_fqn = f.fqn.clone();
2194 *existing_depth = depth;
2195 }
2196 })
2197 .or_insert_with(|| (f.fqn.clone(), depth));
2198 }
2199 let name_to_fqn: HashMap<String, String> = name_to_fqn_depth
2200 .into_iter()
2201 .map(|(k, (v, _))| (k, v))
2202 .collect();
2203
2204 for func in &functions {
2205 let code_bytes = code_bytes_for_span(
2206 compressor,
2207 source,
2208 function_spans.get(&func.fqn).copied(),
2209 file.language,
2210 )
2211 .await;
2212 let q = query(
2213 "
2214 MATCH (f:File { path: $path })
2215 MERGE (fn:Function { fqn: $fn_fqn })
2216 ON CREATE SET fn.name = $fn_name,
2217 fn.path = $path,
2218 fn.project_name = $project_name,
2219 fn.language = $language,
2220 fn.code_bytes = $code_bytes
2221 ON MATCH SET fn.name = $fn_name,
2222 fn.path = $path,
2223 fn.project_name = $project_name,
2224 fn.language = $language,
2225 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
2226 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
2227 ",
2228 )
2229 .param("path", file_path.to_string())
2230 .param("fn_fqn", func.fqn.clone())
2231 .param("fn_name", func.name.clone())
2232 .param("project_name", project_name.clone())
2233 .param("language", language.clone())
2234 .param(props::CODE_BYTES, code_bytes);
2235
2236 graph.run(q).await?;
2237 }
2238
2239 match file.language {
2240 LanguageId::Python => {
2241 for imp in extract_python_import_modules(&file.tree, source) {
2242 if let Some(dep) = resolve_python_import_to_known_file(&imp, known_paths) {
2243 let dep_query = query(
2244 "
2245 MERGE (src:File { path: $src_path })
2246 MERGE (dst:File { path: $dst_path })
2247 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
2248 ",
2249 )
2250 .param("src_path", file_path.to_string())
2251 .param("dst_path", dep.clone());
2252 graph.run(dep_query).await?;
2253 } else if should_log_unresolved_import(
2254 persistence.verbose_imports,
2255 is_python_stdlib_top_level(&imp),
2256 is_python_common_external_top_level(&imp),
2257 ) {
2258 println!(
2259 "Python import (unresolved to scanned files): `{}` in {}",
2260 imp, file_path
2261 );
2262 }
2263 }
2264 for (caller, callee) in extract_python_intrafile_calls(
2265 &file.tree,
2266 source,
2267 file_path,
2268 &name_to_fqn,
2269 ) {
2270 accumulator.add_calls_function(caller, callee);
2271 }
2272 }
2273 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
2274 for spec in extract_js_ts_import_specifiers(&file.tree, source) {
2275 if let Some(dep) =
2276 resolve_js_ts_import_to_known_file(&spec, file_path, known_paths)
2277 {
2278 let dep_query = query(
2279 "
2280 MERGE (src:File { path: $src_path })
2281 MERGE (dst:File { path: $dst_path })
2282 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
2283 ",
2284 )
2285 .param("src_path", file_path.to_string())
2286 .param("dst_path", dep.clone());
2287 graph.run(dep_query).await?;
2288 } else if persistence.verbose_imports {
2289 println!(
2290 "JS/TS import (unresolved to scanned files): `{}` in {}",
2291 spec, file_path
2292 );
2293 }
2294 }
2295 for (caller, callee) in extract_js_ts_intrafile_calls(
2296 &file.tree,
2297 source,
2298 file_path,
2299 file.language,
2300 &name_to_fqn,
2301 ) {
2302 accumulator.add_calls_function(caller, callee);
2303 }
2304 }
2305 LanguageId::Rust => {
2306 for use_path in extract_rust_use_paths(&file.tree, source) {
2307 if let Some(dep) =
2308 resolve_rust_use_to_known_file(&use_path, file_path, known_paths)
2309 {
2310 let dep_query = query(
2311 "
2312 MERGE (src:File { path: $src_path })
2313 MERGE (dst:File { path: $dst_path })
2314 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
2315 ",
2316 )
2317 .param("src_path", file_path.to_string())
2318 .param("dst_path", dep.clone());
2319 graph.run(dep_query).await?;
2320 } else if persistence.verbose_imports {
2321 println!(
2322 "Rust use (unresolved to scanned files): `{}` in {}",
2323 use_path.join("::"),
2324 file_path
2325 );
2326 }
2327 }
2328 for (caller, callee) in
2329 extract_rust_intrafile_calls(&file.tree, source, file_path, &name_to_fqn)
2330 {
2331 accumulator.add_calls_function(caller, callee);
2332 }
2333 }
2334 _ => {}
2335 }
2336
2337 Ok(())
2338}
2339
2340fn extract_internal_java_imports(source: &str) -> Vec<String> {
2348 source
2349 .lines()
2350 .filter_map(|line| {
2351 let line = line.trim();
2352 if !line.starts_with("import ") || !line.ends_with(';') {
2354 return None;
2355 }
2356
2357 let body = &line["import ".len()..line.len() - 1];
2359 let body = body.trim();
2360
2361 let body = body.strip_prefix("static ").map(str::trim).unwrap_or(body);
2363
2364 if !body.starts_with("com.redbus.genai.") {
2366 return None;
2367 }
2368
2369 Some(body.to_string())
2370 })
2371 .collect()
2372}
2373
2374fn map_import_to_project_path(current_path: &str, import_fqn: &str) -> Option<String> {
2383 let marker = "com/redbus/genai/";
2384 let idx = current_path.find(marker)?;
2385 let prefix = ¤t_path[..idx];
2386
2387 let relative = import_fqn.replace('.', "/") + ".java";
2388 Some(format!("{prefix}{relative}"))
2389}
2390
2391fn extract_java_package(source: &str) -> Option<String> {
2393 for line in source.lines() {
2394 let line = line.trim();
2395 if !line.starts_with("package ") || !line.ends_with(';') {
2396 continue;
2397 }
2398
2399 let body = &line["package ".len()..line.len() - 1];
2400 let body = body.trim();
2401 if body.is_empty() {
2402 continue;
2403 }
2404 return Some(body.to_string());
2405 }
2406 None
2407}
2408
2409fn extract_erlang_module_name(source: &str) -> Option<String> {
2412 for line in source.lines() {
2413 let line = line.trim();
2414 if !line.starts_with("-module(") || !line.ends_with(").") {
2415 continue;
2416 }
2417
2418 let inner = &line["-module(".len()..line.len() - 2]; let name = inner.trim();
2420 if !name.is_empty() {
2421 return Some(name.to_string());
2422 }
2423 }
2424 None
2425}
2426
2427fn extract_erlang_module_name_from_tree(tree: &Tree, source: &str) -> Option<String> {
2429 let mut out: Option<String> = None;
2430 let root = tree.root_node();
2431
2432 walk_tree(root, |node| {
2433 if out.is_some() || node.kind() != "module_attribute" {
2434 return;
2435 }
2436 if let Some(name_node) = node.child_by_field_name("name") {
2437 let start = name_node.start_byte() as usize;
2438 let end = name_node.end_byte() as usize;
2439 if end <= source.len() && start < end {
2440 let name = source[start..end].trim().trim_matches('\'').to_string();
2441 if !name.is_empty() {
2442 out = Some(name);
2443 }
2444 }
2445 }
2446 });
2447
2448 out
2449}
2450
2451fn is_erlang_header_file(path: &Path) -> bool {
2452 path.extension()
2453 .and_then(|ext| ext.to_str())
2454 .map(|ext| ext.eq_ignore_ascii_case("hrl"))
2455 .unwrap_or(false)
2456}
2457
2458fn resolve_erlang_module_name(path: &Path, tree: &Tree, source: &str) -> Option<String> {
2463 if let Some(module_name) = extract_erlang_module_name_from_tree(tree, source)
2464 .or_else(|| extract_erlang_module_name(source))
2465 {
2466 return Some(module_name);
2467 }
2468
2469 if is_erlang_header_file(path) {
2470 None
2471 } else {
2472 Some(guess_erlang_module_name_from_path(&path.display().to_string()))
2473 }
2474}
2475
2476fn guess_erlang_module_name_from_path(path: &str) -> String {
2479 Path::new(path)
2480 .file_stem()
2481 .and_then(|s| s.to_str())
2482 .unwrap_or("unknown_module")
2483 .to_string()
2484}
2485
2486fn extract_erlang_behaviour_metadata_from_tree(tree: &Tree, source: &str) -> ErlangBehaviourMetadata {
2487 let mut meta = ErlangBehaviourMetadata::default();
2488 let mut seen_declared_callbacks: HashSet<(String, u32)> = HashSet::new();
2489 let root = tree.root_node();
2490
2491 walk_tree(root, |node| match node.kind() {
2492 "behaviour_attribute" => {
2493 if let Some(name_node) = node.child_by_field_name("name") {
2494 if let Some(name) = extract_erlang_name(name_node, source) {
2495 meta.behaviour_usages.insert(name);
2496 }
2497 }
2498 }
2499 "callback" => {
2500 let Some(fun_node) = node.child_by_field_name("fun") else {
2501 return;
2502 };
2503 let Some(fun_name) = extract_erlang_name(fun_node, source) else {
2504 return;
2505 };
2506
2507 let mut cursor = node.walk();
2508 for sig_node in node.children_by_field_name("sigs", &mut cursor) {
2509 let Some(args_node) = sig_node.child_by_field_name("args") else {
2510 continue;
2511 };
2512 let key = (fun_name.clone(), args_node.named_child_count() as u32);
2513 if seen_declared_callbacks.insert(key.clone()) {
2514 meta.declared_callbacks.push(key);
2515 }
2516 }
2517 }
2518 "optional_callbacks_attribute" => {
2519 let mut cursor = node.walk();
2520 for fa_node in node.children_by_field_name("callbacks", &mut cursor) {
2521 if let Some((name, arity)) = extract_erlang_fa_pair(fa_node, source) {
2522 meta.optional_callbacks.insert((name, arity));
2523 }
2524 }
2525 }
2526 "wild_attribute" => {
2527 let Some(attr_node) = node.child_by_field_name("name") else {
2528 return;
2529 };
2530 let Some(attr_name) = extract_wild_attribute_name(attr_node, source) else {
2531 return;
2532 };
2533 let Some(value_node) = node.child_by_field_name("value") else {
2534 return;
2535 };
2536
2537 match attr_name.as_str() {
2538 "extends_behaviour" => {
2539 if let Some(parent_behaviour) = extract_single_name_expr(value_node, source) {
2540 meta.behaviour_extensions.insert(parent_behaviour);
2541 }
2542 }
2543 "override_callback" => {
2544 let mut pairs: HashSet<(String, u32)> = HashSet::new();
2545 walk_tree(value_node, |child| {
2546 if child.kind() == "fa" {
2547 if let Some((name, arity)) = extract_erlang_fa_pair(child, source) {
2548 pairs.insert((name, arity));
2549 }
2550 }
2551 });
2552 if pairs.is_empty() {
2553 if let Some(raw_value) = node_text(value_node, source) {
2554 pairs.extend(parse_fa_pairs_from_expr_text(raw_value));
2555 }
2556 }
2557 meta.overridden_callbacks.extend(pairs);
2558 }
2559 _ => {}
2560 }
2561 }
2562 _ => {}
2563 });
2564
2565 meta
2566}
2567
2568fn extract_wild_attribute_name(attr_node: Node, source: &str) -> Option<String> {
2569 let name_node = attr_node.child_by_field_name("name").unwrap_or(attr_node);
2570 let mut name = extract_erlang_name(name_node, source)?;
2571 if let Some(stripped) = name.strip_prefix('-') {
2572 name = stripped.to_string();
2573 }
2574 Some(name)
2575}
2576
2577fn extract_erlang_fa_pair(fa_node: Node, source: &str) -> Option<(String, u32)> {
2578 if fa_node.kind() != "fa" {
2579 return None;
2580 }
2581 let fun_node = fa_node.child_by_field_name("fun")?;
2582 let arity_node = fa_node.child_by_field_name("arity")?;
2583
2584 let fun_name = extract_erlang_name(fun_node, source)?;
2585 let raw_arity = node_text(arity_node, source)?;
2586 let arity = parse_erlang_arity(raw_arity)?;
2587 Some((fun_name, arity))
2588}
2589
2590fn parse_erlang_arity(raw: &str) -> Option<u32> {
2591 raw.trim().trim_start_matches('/').parse::<u32>().ok()
2592}
2593
2594fn extract_single_name_expr(node: Node, source: &str) -> Option<String> {
2595 let mut raw = node_text(node, source)?.trim();
2596 while raw.starts_with('(') && raw.ends_with(')') && raw.len() >= 2 {
2597 raw = raw[1..raw.len() - 1].trim();
2598 }
2599 normalize_erlang_name(raw)
2600}
2601
2602fn parse_fa_pairs_from_expr_text(expr_text: &str) -> HashSet<(String, u32)> {
2603 let mut out = HashSet::new();
2604 let mut raw = expr_text.trim();
2605 while raw.starts_with('(') && raw.ends_with(')') && raw.len() >= 2 {
2606 raw = raw[1..raw.len() - 1].trim();
2607 }
2608
2609 for token in raw.split(',') {
2610 let token = token.trim();
2611 let Some((name_raw, arity_raw)) = token.split_once('/') else {
2612 continue;
2613 };
2614 let Some(name) = normalize_erlang_name(name_raw) else {
2615 continue;
2616 };
2617 let Some(arity) = parse_erlang_arity(arity_raw) else {
2618 continue;
2619 };
2620 out.insert((name, arity));
2621 }
2622
2623 out
2624}
2625
2626fn extract_erlang_name(node: Node, source: &str) -> Option<String> {
2627 let raw = node_text(node, source)?;
2628 normalize_erlang_name(raw)
2629}
2630
2631fn node_text<'a>(node: Node, source: &'a str) -> Option<&'a str> {
2632 let start = node.start_byte() as usize;
2633 let end = node.end_byte() as usize;
2634 if end <= source.len() && start < end {
2635 Some(&source[start..end])
2636 } else {
2637 None
2638 }
2639}
2640
2641fn normalize_erlang_name(raw: &str) -> Option<String> {
2642 let mut name = raw.trim();
2643 if name.starts_with('\'') && name.ends_with('\'') && name.len() >= 2 {
2644 name = &name[1..name.len() - 1];
2645 }
2646 if name.is_empty() {
2647 return None;
2648 }
2649 if name.starts_with('?') {
2650 return None;
2651 }
2652 if name
2653 .chars()
2654 .next()
2655 .map(|c| c.is_ascii_uppercase())
2656 .unwrap_or(false)
2657 {
2658 return None;
2659 }
2660 if name.chars().any(char::is_whitespace) {
2661 return None;
2662 }
2663 Some(name.to_string())
2664}
2665
2666fn known_behaviour_callbacks(behaviour: &str) -> &'static [(&'static str, u32)] {
2667 match behaviour {
2668 "gen_server" => &[
2669 ("init", 1),
2670 ("handle_call", 3),
2671 ("handle_cast", 2),
2672 ("handle_info", 2),
2673 ("terminate", 2),
2674 ("code_change", 3),
2675 ("format_status", 2),
2676 ],
2677 "supervisor" => &[("init", 1)],
2678 "gen_statem" => &[
2679 ("init", 1),
2680 ("callback_mode", 0),
2681 ("state_name", 3),
2682 ("state_name", 2),
2683 ("terminate", 3),
2684 ("code_change", 4),
2685 ("format_status", 2),
2686 ],
2687 "cowboy_handler" => &[("init", 2)],
2688 "cowboy_loop" => &[("init", 2), ("info", 3), ("terminate", 3)],
2689 "cowboy_websocket" => &[
2690 ("init", 2),
2691 ("websocket_init", 1),
2692 ("websocket_handle", 2),
2693 ("websocket_info", 2),
2694 ("terminate", 3),
2695 ],
2696 "cowboy_rest" => &[
2697 ("init", 2),
2698 ("allowed_methods", 2),
2699 ("content_types_provided", 2),
2700 ("content_types_accepted", 2),
2701 ("resource_exists", 2),
2702 ("is_authorized", 2),
2703 ("forbidden", 2),
2704 ("malformed_request", 2),
2705 ("delete_resource", 2),
2706 ("generate_etag", 2),
2707 ("last_modified", 2),
2708 ],
2709 _ => &[],
2710 }
2711}
2712
2713fn collect_callback_contracts_for_module(
2714 module_name: Option<&str>,
2715 behaviour_usages: &HashSet<String>,
2716 declared_callbacks: &[(String, u32)],
2717 optional_callbacks: &HashSet<(String, u32)>,
2718) -> Vec<CallbackContract> {
2719 let mut contracts = HashSet::new();
2720
2721 if let Some(module_name) = module_name {
2723 for (name, arity) in declared_callbacks {
2724 contracts.insert(CallbackContract {
2725 behaviour: module_name.to_string(),
2726 name: name.clone(),
2727 arity: *arity,
2728 optional: optional_callbacks.contains(&(name.clone(), *arity)),
2729 });
2730 }
2731 }
2732
2733 for behaviour in behaviour_usages {
2735 for (name, arity) in known_behaviour_callbacks(behaviour) {
2736 contracts.insert(CallbackContract {
2737 behaviour: behaviour.clone(),
2738 name: (*name).to_string(),
2739 arity: *arity,
2740 optional: false,
2741 });
2742 }
2743 }
2744
2745 contracts.into_iter().collect()
2746}
2747
2748fn select_endpoint_handler_fqns(
2749 handler_module: &str,
2750 erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
2751) -> Vec<String> {
2752 let Some(snapshot) = erlang_module_index.get(handler_module) else {
2753 return Vec::new();
2754 };
2755
2756 let mut fqns: Vec<String> = snapshot
2757 .callback_signatures
2758 .iter()
2759 .filter(|sig| snapshot.implemented_signatures.contains(*sig))
2760 .map(|(name, arity)| format!("{module}:{name}/{arity}", module = handler_module))
2761 .collect();
2762
2763 fqns.sort();
2764 fqns.dedup();
2765 fqns
2766}
2767
2768fn extract_erlang_functions(
2773 module_name: &str,
2774 tree: &Tree,
2775 source: &str,
2776) -> Vec<(String, u32, String)> {
2777 let mut seen: HashSet<(String, u32)> = HashSet::new();
2778 let mut out: Vec<(String, u32, String)> = Vec::new();
2779
2780 let root = tree.root_node();
2781 walk_tree(root, |node| {
2782 if node.kind() != "function_clause" || !is_top_level_erlang_function_clause(node) {
2783 return;
2784 }
2785
2786 let Some(name_node) = node.child_by_field_name("name") else {
2787 return;
2788 };
2789 let Some(fun_name) = extract_erlang_name(name_node, source) else {
2790 return;
2791 };
2792 let Some(args_node) = node.child_by_field_name("args") else {
2793 return;
2794 };
2795 let arity = args_node.named_child_count() as u32;
2796
2797 if seen.insert((fun_name.clone(), arity)) {
2798 let fqn = format!("{module}:{name}/{arity}", module = module_name, name = fun_name);
2799 out.push((fun_name, arity, fqn));
2800 }
2801 });
2802
2803 out
2804}
2805
2806fn is_top_level_erlang_function_clause(node: Node) -> bool {
2807 let mut parent = node.parent();
2808 let mut has_source_file_ancestor = false;
2809 while let Some(p) = parent {
2810 match p.kind() {
2811 "fun_expr" => return false,
2814 "source_file" => {
2815 has_source_file_ancestor = true;
2816 break;
2817 }
2818 _ => {
2819 parent = p.parent();
2820 }
2821 }
2822 }
2823 has_source_file_ancestor
2824}
2825
2826fn extract_erlang_call_edges(
2827 tree: &Tree,
2828 source: &str,
2829 module_name: Option<&str>,
2830 function_by_sig: &HashMap<(String, u32), String>,
2831) -> HashSet<(String, String)> {
2832 let Some(module_name) = module_name else {
2833 return HashSet::new();
2834 };
2835 let mut edges = HashSet::new();
2836 let root = tree.root_node();
2837
2838 walk_tree(root, |node| {
2839 if node.kind() != "call" {
2840 return;
2841 }
2842 let Some(caller_clause) = find_enclosing_function_clause(node) else {
2843 return;
2844 };
2845 let Some(caller_name_node) = caller_clause.child_by_field_name("name") else {
2846 return;
2847 };
2848 let Some(caller_name) = extract_erlang_name(caller_name_node, source) else {
2849 return;
2850 };
2851 let Some(caller_args) = caller_clause.child_by_field_name("args") else {
2852 return;
2853 };
2854 let caller_arity = caller_args.named_child_count() as u32;
2855 let Some(caller_fqn) = function_by_sig.get(&(caller_name, caller_arity)) else {
2856 return;
2857 };
2858
2859 let Some(call_expr) = node.child_by_field_name("expr") else {
2860 return;
2861 };
2862 let Some(call_args) = node.child_by_field_name("args") else {
2863 return;
2864 };
2865 let callee_arity = call_args.named_child_count() as u32;
2866 let Some(callee_name) = extract_local_call_name(call_expr, source) else {
2867 return;
2868 };
2869
2870 let callee_key = (callee_name, callee_arity);
2871 let Some(callee_fqn) = function_by_sig.get(&callee_key) else {
2872 return;
2873 };
2874 if caller_fqn != callee_fqn && caller_fqn.starts_with(module_name) && callee_fqn.starts_with(module_name) {
2875 edges.insert((caller_fqn.clone(), callee_fqn.clone()));
2876 }
2877 });
2878
2879 edges
2880}
2881
2882fn find_enclosing_function_clause(node: Node) -> Option<Node> {
2883 let mut cur = node.parent();
2884 while let Some(parent) = cur {
2885 if parent.kind() == "function_clause" {
2886 return Some(parent);
2887 }
2888 if parent.kind() == "source_file" {
2889 break;
2890 }
2891 cur = parent.parent();
2892 }
2893 None
2894}
2895
2896fn extract_local_call_name(expr_node: Node, source: &str) -> Option<String> {
2897 match expr_node.kind() {
2898 "_name" | "atom" | "var" => extract_erlang_name(expr_node, source),
2900 "remote" => None,
2902 _ => extract_erlang_name(expr_node, source),
2903 }
2904}
2905
2906fn extract_erlang_api_endpoints(
2912 tree: &Tree,
2913 source: &str,
2914) -> Vec<(Vec<String>, String, String)> {
2915 let mut endpoints = Vec::new();
2916 let mut seen: HashSet<(String, String)> = HashSet::new();
2917 let root = tree.root_node();
2918 walk_tree(root, |node| {
2919 if node.kind() != "tuple" {
2920 return;
2921 }
2922 let mut cursor = node.walk();
2923 let exprs: Vec<Node> = node.children_by_field_name("expr", &mut cursor).collect();
2924 if exprs.len() < 2 {
2925 return;
2926 }
2927 let Some(path_raw) = extract_erlang_string_literal(exprs[0], source) else {
2928 return;
2929 };
2930 let path = normalize_api_path(&path_raw);
2931 if !path.starts_with('/') {
2932 return;
2933 }
2934 let Some(handler_name) = extract_erlang_name(exprs[1], source) else {
2935 return;
2936 };
2937 let key = (path.clone(), handler_name.clone());
2938 if seen.insert(key) {
2939 endpoints.push((vec!["ANY".to_string()], path, handler_name));
2940 }
2941 });
2942
2943 endpoints
2944}
2945
2946fn extract_external_http_urls(source: &str) -> Vec<String> {
2951 let mut urls = HashSet::new();
2952
2953 for line in source.lines() {
2954 let mut rest = line;
2955 loop {
2956 let start = match rest.find("http://").or_else(|| rest.find("https://")) {
2957 Some(i) => i,
2958 None => break,
2959 };
2960 let after = &rest[start..];
2961 let end = after
2962 .find(|c: char| c == '"' || c.is_whitespace() || c == '\'' || c == ')')
2963 .unwrap_or(after.len());
2964 let url = &after[..end];
2965 if !url.is_empty() {
2966 urls.insert(url.to_string());
2967 }
2968 rest = &after[end..];
2969 }
2970 }
2971
2972 urls.into_iter().collect()
2973}
2974
2975fn extract_external_http_urls_from_tree(tree: &Tree, source: &str) -> Vec<String> {
2977 let mut urls = HashSet::new();
2978 let root = tree.root_node();
2979 walk_tree(root, |node| {
2980 let Some(s) = extract_erlang_string_literal(node, source) else {
2981 return;
2982 };
2983 if s.starts_with("http://") || s.starts_with("https://") {
2984 urls.insert(s);
2985 }
2986 });
2987
2988 urls.into_iter().collect()
2989}
2990
2991fn scan_http_urls_in_span(source: &str, span_start: usize, span_end: usize) -> Vec<(String, usize, usize)> {
2994 let mut out = Vec::new();
2995 if span_start >= span_end || span_end > source.len() {
2996 return out;
2997 }
2998 let mut rest_start = span_start;
2999 while rest_start < span_end {
3000 let Some(slice) = source.get(rest_start..span_end) else {
3001 break;
3002 };
3003 let rel = match slice.find("http://").or_else(|| slice.find("https://")) {
3004 Some(i) => i,
3005 None => break,
3006 };
3007 let abs_start = rest_start + rel;
3008 let Some(after) = source.get(abs_start..span_end) else {
3009 break;
3010 };
3011 let end_rel = after
3012 .find(|c: char| c == '"' || c.is_whitespace() || c == '\'' || c == ')')
3013 .unwrap_or(after.len());
3014 let url = after[..end_rel].trim();
3015 if !url.is_empty() {
3016 out.push((url.to_string(), abs_start, abs_start + end_rel));
3017 }
3018 rest_start = abs_start + end_rel.max(1);
3019 }
3020 out
3021}
3022
3023fn extract_csharp_external_http_urls_with_spans(tree: &Tree, source: &str) -> Vec<(String, usize, usize)> {
3025 let mut seen: HashSet<(String, usize, usize)> = HashSet::new();
3026 let root = tree.root_node();
3027 walk_tree(root, |node| {
3028 let kind = node.kind();
3029 if !matches!(
3030 kind,
3031 "string_literal"
3032 | "verbatim_string_literal"
3033 | "interpolated_string_text"
3034 | "interpolated_verbatim_string_text"
3035 ) {
3036 return;
3037 }
3038 let start = node.start_byte() as usize;
3039 let end = node.end_byte() as usize;
3040 for triple in scan_http_urls_in_span(source, start, end) {
3041 seen.insert(triple);
3042 }
3043 });
3044 seen.into_iter().collect()
3045}
3046
3047fn csharp_method_body_spans(tree: &Tree, source: &str, namespace: Option<&str>) -> Vec<(String, usize, usize)> {
3050 let mut out = Vec::new();
3051 let root = tree.root_node();
3052 walk_tree(root, |node| {
3053 match node.kind() {
3054 "method_declaration" => {
3055 let Some(body) = node.child_by_field_name("body") else {
3056 return;
3057 };
3058 let Some(method_name) = csharp_method_simple_name(node, source) else {
3059 return;
3060 };
3061 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
3062 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
3063 .or_else(|| namespace.map(|s| s.to_string()));
3064 let fqn = match class_fqn {
3065 Some(cf) => format!("{cf}.{method_name}"),
3066 None => effective_ns
3067 .as_ref()
3068 .map(|ns| format!("{ns}.{method_name}"))
3069 .unwrap_or(method_name),
3070 };
3071 let lo = body.start_byte() as usize;
3072 let hi = body.end_byte() as usize;
3073 out.push((fqn, lo, hi));
3074 }
3075 "constructor_declaration" => {
3076 let Some(cf) = csharp_enclosing_type_fqn(node, source, namespace) else {
3077 return;
3078 };
3079 let (_, fqn) = csharp_constructor_symbol_fqn(node, &cf, source);
3080 let Some(body) = csharp_block_body(node) else {
3081 return;
3082 };
3083 let lo = body.start_byte() as usize;
3084 let hi = body.end_byte() as usize;
3085 out.push((fqn, lo, hi));
3086 }
3087 "accessor_declaration" => {
3088 let Some(cf) = csharp_enclosing_type_fqn(node, source, namespace) else {
3089 return;
3090 };
3091 let Some(prop_name) = csharp_property_name_for_accessor(node, source) else {
3092 return;
3093 };
3094 let prefix = csharp_accessor_kind_prefix(node, source);
3095 let fqn = format!("{cf}.{}_{}", prefix, prop_name);
3096 let Some(body) = csharp_block_body(node) else {
3097 return;
3098 };
3099 let lo = body.start_byte() as usize;
3100 let hi = body.end_byte() as usize;
3101 out.push((fqn, lo, hi));
3102 }
3103 _ => {}
3104 }
3105 });
3106 out
3107}
3108
3109fn extract_erlang_called_modules_from_tree(tree: &Tree, source: &str) -> HashSet<String> {
3111 let mut modules = HashSet::new();
3112 let root = tree.root_node();
3113 walk_tree(root, |node| {
3114 if node.kind() != "remote" {
3115 return;
3116 }
3117 let Some(remote_module) = node.child_by_field_name("module") else {
3118 return;
3119 };
3120 let Some(module_expr) = remote_module.child_by_field_name("module") else {
3121 return;
3122 };
3123 if let Some(name) = extract_erlang_name(module_expr, source) {
3124 modules.insert(name);
3125 }
3126 });
3127
3128 modules
3129}
3130
3131fn split_url_protocol_host_and_path(url: &str) -> (Option<String>, String, String) {
3137 if let Some(idx) = url.find("://") {
3138 let proto = &url[..idx];
3139 let rest = &url[idx + 3..];
3140
3141 let host_end = rest
3145 .find(|c: char| c == '/' || c == '?')
3146 .unwrap_or(rest.len());
3147
3148 let host = &rest[..host_end];
3149 let path = if host_end < rest.len() {
3150 &rest[host_end..]
3151 } else {
3152 "/"
3153 };
3154
3155 (
3156 Some(proto.to_string()),
3157 host.to_string(),
3158 path.to_string(),
3159 )
3160 } else {
3161 (None, url.to_string(), "/".to_string())
3162 }
3163}
3164
3165fn normalize_api_path(raw: &str) -> String {
3175 let mut p = raw.trim().to_string();
3176
3177 if let Some(idx) = p.find("://") {
3179 let after = &p[idx + 3..];
3180 if let Some(slash) = after.find('/') {
3181 p = after[slash..].to_string();
3182 } else {
3183 return "/".to_string();
3185 }
3186 }
3187
3188 if let Some(idx) = p.find(|c: char| c == '?' || c == '#') {
3190 p.truncate(idx);
3191 }
3192
3193 if !p.starts_with('/') {
3195 p.insert(0, '/');
3196 }
3197
3198 if p.len() > 1 && p.ends_with('/') {
3200 p.pop();
3201 }
3202
3203 p
3204}
3205
3206fn extract_erlang_string_literal(node: Node, source: &str) -> Option<String> {
3207 if node.kind() != "string" {
3208 return None;
3209 }
3210 let raw = node_text(node, source)?.trim();
3211 if raw.starts_with('"') && raw.ends_with('"') && raw.len() >= 2 {
3213 return Some(raw[1..raw.len() - 1].to_string());
3214 }
3215 let first = raw.find('"')?;
3218 let last = raw.rfind('"')?;
3219 if last > first {
3220 return Some(raw[first + 1..last].to_string());
3221 }
3222 None
3223}
3224
3225fn guess_erlang_file_path_from_module(current_path: &str, module_name: &str) -> String {
3228 let base = Path::new(current_path)
3229 .parent()
3230 .map(|p| p.to_path_buf())
3231 .unwrap_or_else(|| Path::new(".").to_path_buf());
3232 path_str_slash(&base.join(format!("{module}.erl", module = module_name)))
3233}
3234
3235fn node_byte_span(node: Node) -> (usize, usize) {
3236 (
3237 node.start_byte() as usize,
3238 node.end_byte() as usize,
3239 )
3240}
3241
3242fn insert_widest_span(map: &mut HashMap<String, (usize, usize)>, key: String, span: (usize, usize)) {
3243 map.entry(key)
3244 .and_modify(|existing| {
3245 existing.0 = existing.0.min(span.0);
3246 existing.1 = existing.1.max(span.1);
3247 })
3248 .or_insert(span);
3249}
3250
3251fn extract_java_class_spans(tree: &Tree, source: &str, package: Option<&str>) -> HashMap<String, (usize, usize)> {
3252 let mut out = HashMap::new();
3253 walk_tree(tree.root_node(), |node| {
3254 if node.kind() != "class_declaration" && node.kind() != "interface_declaration" {
3255 return;
3256 }
3257 let Some(name) = identifier_text_from_children(node, source) else {
3258 return;
3259 };
3260 let fqn = package
3261 .map(|pkg| format!("{pkg}.{name}"))
3262 .unwrap_or(name);
3263 insert_widest_span(&mut out, fqn, node_byte_span(node));
3264 });
3265 out
3266}
3267
3268fn extract_java_method_body_spans(
3269 tree: &Tree,
3270 source: &str,
3271 package: Option<&str>,
3272) -> HashMap<String, (usize, usize)> {
3273 let mut out = HashMap::new();
3274 walk_tree(tree.root_node(), |node| {
3275 if node.kind() != "method_declaration" {
3276 return;
3277 }
3278 let Some(method_name) = identifier_text_from_children(node, source) else {
3279 return;
3280 };
3281 let mut parent = node.parent();
3282 let mut class_fqn: Option<String> = None;
3283 while let Some(p) = parent {
3284 let pk = p.kind();
3285 if pk == "class_declaration" || pk == "interface_declaration" {
3286 if let Some(class_name) = identifier_text_from_children(p, source) {
3287 class_fqn = Some(
3288 package
3289 .map(|pkg| format!("{pkg}.{class_name}"))
3290 .unwrap_or(class_name),
3291 );
3292 }
3293 break;
3294 }
3295 parent = p.parent();
3296 }
3297 let fqn = if let Some(ref cls) = class_fqn {
3298 format!("{cls}.{method_name}")
3299 } else if let Some(pkg) = package {
3300 format!("{pkg}.{method_name}")
3301 } else {
3302 method_name.clone()
3303 };
3304 let span = node
3305 .child_by_field_name("body")
3306 .map(node_byte_span)
3307 .unwrap_or_else(|| node_byte_span(node));
3308 insert_widest_span(&mut out, fqn, span);
3309 });
3310 out
3311}
3312
3313fn extract_csharp_class_spans(tree: &Tree, source: &str) -> HashMap<String, (usize, usize)> {
3314 let mut out = HashMap::new();
3315 let namespace = extract_csharp_namespace(tree, source);
3316 walk_tree(tree.root_node(), |node| {
3317 match node.kind() {
3318 "class_declaration"
3319 | "interface_declaration"
3320 | "struct_declaration"
3321 | "enum_declaration"
3322 | "record_declaration"
3323 | "record_struct_declaration" => {
3324 if let Some(fqn) = csharp_fqn_for_type_declaration(node, source, namespace.as_deref()) {
3325 insert_widest_span(&mut out, fqn, node_byte_span(node));
3326 }
3327 }
3328 _ => {}
3329 }
3330 });
3331 out
3332}
3333
3334fn extract_csharp_property_spans(tree: &Tree, source: &str) -> HashMap<String, (usize, usize)> {
3335 let mut out = HashMap::new();
3336 let namespace = extract_csharp_namespace(tree, source);
3337 walk_tree(tree.root_node(), |node| {
3338 if node.kind() != "property_declaration" {
3339 return;
3340 }
3341 let Some(class_fqn) = csharp_enclosing_type_fqn(node, source, namespace.as_deref()) else {
3342 return;
3343 };
3344 let Some(prop_name_node) = node.child_by_field_name("name") else {
3345 return;
3346 };
3347 let Some(prop_name) = csharp_node_text(prop_name_node, source).filter(|s| !s.is_empty()) else {
3348 return;
3349 };
3350 let fqn = format!("{class_fqn}.{prop_name}");
3351 insert_widest_span(&mut out, fqn, node_byte_span(node));
3352 });
3353 out
3354}
3355
3356fn extract_csharp_method_body_spans_map(
3357 tree: &Tree,
3358 source: &str,
3359 namespace: Option<&str>,
3360) -> HashMap<String, (usize, usize)> {
3361 csharp_method_body_spans(tree, source, namespace)
3362 .into_iter()
3363 .map(|(fqn, lo, hi)| (fqn, (lo, hi)))
3364 .collect()
3365}
3366
3367fn extract_erlang_function_spans(
3368 module_name: &str,
3369 tree: &Tree,
3370 source: &str,
3371) -> HashMap<String, (usize, usize)> {
3372 let mut out = HashMap::new();
3373 walk_tree(tree.root_node(), |node| {
3374 if node.kind() != "function_clause" || !is_top_level_erlang_function_clause(node) {
3375 return;
3376 }
3377 let Some(name_node) = node.child_by_field_name("name") else {
3378 return;
3379 };
3380 let Some(fun_name) = extract_erlang_name(name_node, source) else {
3381 return;
3382 };
3383 let Some(args_node) = node.child_by_field_name("args") else {
3384 return;
3385 };
3386 let arity = args_node.named_child_count() as u32;
3387 let fqn = format!("{module_name}:{fun_name}/{arity}");
3388 insert_widest_span(&mut out, fqn, node_byte_span(node));
3389 });
3390 out
3391}
3392
3393fn extract_go_class_spans(
3394 tree: &Tree,
3395 source: &str,
3396 package: Option<&str>,
3397) -> HashMap<String, (usize, usize)> {
3398 let mut out = HashMap::new();
3399 walk_tree(tree.root_node(), |node| {
3400 if node.kind() != "type_declaration" {
3401 return;
3402 }
3403 let mut i = 0usize;
3404 while let Some(child) = node.child(i) {
3405 i += 1;
3406 if child.kind() != "type_spec" {
3407 continue;
3408 }
3409 let Some(type_n) = child.child_by_field_name("type") else {
3410 continue;
3411 };
3412 if type_n.kind() != "struct_type" && type_n.kind() != "interface_type" {
3413 continue;
3414 }
3415 let Some(name_node) = child.child_by_field_name("name") else {
3416 continue;
3417 };
3418 let start = name_node.start_byte() as usize;
3419 let end = (name_node.end_byte() as usize).min(source.len());
3420 if start >= end {
3421 continue;
3422 }
3423 let name = source[start..end].to_string();
3424 let fqn = package
3425 .map(|pkg| format!("{pkg}.{name}"))
3426 .unwrap_or(name);
3427 insert_widest_span(&mut out, fqn, node_byte_span(child));
3428 }
3429 });
3430 out
3431}
3432
3433fn extract_go_function_body_spans(
3434 tree: &Tree,
3435 source: &str,
3436 package: Option<&str>,
3437) -> HashMap<String, (usize, usize)> {
3438 let mut out = HashMap::new();
3439 walk_tree(tree.root_node(), |node| {
3440 let kind = node.kind();
3441 if kind != "function_declaration" && kind != "method_declaration" {
3442 return;
3443 }
3444 let fqn = match kind {
3445 "function_declaration" => go_decl_fqn_from_function_declaration(node, source, package),
3446 "method_declaration" => go_decl_fqn_from_method_declaration(node, source, package),
3447 _ => None,
3448 };
3449 let Some(fqn) = fqn else {
3450 return;
3451 };
3452 let span = node
3453 .child_by_field_name("body")
3454 .map(node_byte_span)
3455 .unwrap_or_else(|| node_byte_span(node));
3456 insert_widest_span(&mut out, fqn, span);
3457 });
3458 out
3459}
3460
3461fn extract_non_java_function_body_spans(
3462 file: &ParsedFile,
3463 source: &str,
3464 file_path: &str,
3465) -> HashMap<String, (usize, usize)> {
3466 let mut out = HashMap::new();
3467 match file.language {
3468 LanguageId::Python => {
3469 walk_tree(file.tree.root_node(), |node| {
3470 if node.kind() != "function_definition" || python_node_inside_class(node) {
3471 return;
3472 }
3473 let Some(logical) = python_function_logical_name(node, source) else {
3474 return;
3475 };
3476 let fqn = non_java_file_scoped_fqn(file_path, &logical);
3477 let span = node
3478 .child_by_field_name("body")
3479 .map(node_byte_span)
3480 .unwrap_or_else(|| node_byte_span(node));
3481 insert_widest_span(&mut out, fqn, span);
3482 });
3483 }
3484 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
3485 walk_tree(file.tree.root_node(), |node| {
3486 if node.kind() != "function_declaration" {
3487 return;
3488 }
3489 let Some(name) = js_ts_function_name(node, source) else {
3490 return;
3491 };
3492 let fqn = non_java_file_scoped_fqn(file_path, &name);
3493 let span = node
3494 .child_by_field_name("body")
3495 .map(node_byte_span)
3496 .unwrap_or_else(|| node_byte_span(node));
3497 insert_widest_span(&mut out, fqn, span);
3498 });
3499 }
3500 LanguageId::Rust => {
3501 walk_tree(file.tree.root_node(), |node| {
3502 if node.kind() != "function_item" || rust_inside_impl(node) {
3503 return;
3504 }
3505 let Some(logical) = rust_function_logical_name(node, source) else {
3506 return;
3507 };
3508 let fqn = non_java_file_scoped_fqn(file_path, &logical);
3509 let span = node
3510 .child_by_field_name("body")
3511 .map(node_byte_span)
3512 .unwrap_or_else(|| node_byte_span(node));
3513 insert_widest_span(&mut out, fqn, span);
3514 });
3515 }
3516 _ => {}
3517 }
3518 out
3519}
3520
3521fn js_ts_function_name(node: Node, source: &str) -> Option<String> {
3522 node.child_by_field_name("name")
3523 .and_then(|n| node_text_slice(n, source))
3524 .map(|s| s.trim().to_string())
3525 .filter(|s| !s.is_empty())
3526}
3527
3528fn rust_function_name(node: Node, source: &str) -> Option<String> {
3529 node.child_by_field_name("name")
3530 .and_then(|n| node_text_slice(n, source))
3531 .map(|s| s.trim().to_string())
3532 .filter(|s| !s.is_empty())
3533}
3534
3535async fn code_bytes_for_span(
3536 compressor: Option<&CompressorClient>,
3537 source: &str,
3538 span: Option<(usize, usize)>,
3539 language: LanguageId,
3540) -> Option<Vec<u8>> {
3541 let client = compressor?;
3542 compress_snippet(source, span, language, client).await
3543}
3544
3545fn walk_tree(root: Node, mut f: impl FnMut(Node)) {
3547 let mut stack = vec![root];
3548 while let Some(node) = stack.pop() {
3549 f(node);
3550 let child_count = node.child_count();
3551 for i in (0..child_count).rev() {
3552 if let Some(child) = node.child(i) {
3553 stack.push(child);
3554 }
3555 }
3556 }
3557}
3558
3559fn walk_tree_skip_local_functions(root: Node, mut f: impl FnMut(Node)) {
3562 let mut stack = vec![root];
3563 while let Some(node) = stack.pop() {
3564 f(node);
3565 if node.kind() == "local_function_statement" {
3566 continue;
3567 }
3568 let child_count = node.child_count();
3569 for i in (0..child_count).rev() {
3570 if let Some(child) = node.child(i) {
3571 stack.push(child);
3572 }
3573 }
3574 }
3575}
3576
3577fn identifier_text_from_children(node: Node, source: &str) -> Option<String> {
3579 let child_count = node.child_count();
3580 for i in 0..child_count {
3581 if let Some(child) = node.child(i) {
3582 let kind = child.kind();
3583 if kind == "identifier" || kind == "name" {
3584 let start = child.start_byte() as usize;
3585 let end = child.end_byte() as usize;
3586 if end <= source.len() && start < end {
3587 return Some(source[start..end].to_string());
3588 }
3589 }
3590 }
3591 }
3592 None
3593}
3594
3595fn extract_java_symbols(
3597 tree: &Tree,
3598 source: &str,
3599 package: Option<&str>,
3600) -> (Vec<ClassSymbol>, Vec<FunctionSymbol>) {
3601 let mut classes: Vec<ClassSymbol> = Vec::new();
3602 let mut methods: Vec<FunctionSymbol> = Vec::new();
3603 let mut seen_class_fqns: HashSet<String> = HashSet::new();
3604
3605 let root = tree.root_node();
3606 walk_tree(root, |node| {
3607 let kind = node.kind();
3608 match kind {
3609 "class_declaration" | "interface_declaration" => {
3610 if let Some(name) = identifier_text_from_children(node, source) {
3611 let fqn = if let Some(pkg) = package {
3612 format!("{pkg}.{name}")
3613 } else {
3614 name.clone()
3615 };
3616 if seen_class_fqns.insert(fqn.clone()) {
3617 classes.push(ClassSymbol {
3618 name,
3619 fqn,
3620 kind: None,
3621 });
3622 }
3623 }
3624 }
3625 "method_declaration" => {
3626 if let Some(method_name) = identifier_text_from_children(node, source) {
3627 let mut parent = node.parent();
3629 let mut class_fqn: Option<String> = None;
3630 while let Some(p) = parent {
3631 let pk = p.kind();
3632 if pk == "class_declaration" || pk == "interface_declaration" {
3633 if let Some(class_name) = identifier_text_from_children(p, source) {
3634 let full = if let Some(pkg) = package {
3635 format!("{pkg}.{class_name}")
3636 } else {
3637 class_name
3638 };
3639 class_fqn = Some(full);
3640 }
3641 break;
3642 }
3643 parent = p.parent();
3644 }
3645
3646 let fqn = if let Some(ref cls) = class_fqn {
3647 format!("{cls}.{}", method_name)
3648 } else if let Some(pkg) = package {
3649 format!("{pkg}.{}", method_name)
3650 } else {
3651 method_name.clone()
3652 };
3653
3654 let (return_type, param_types, param_count) =
3655 extract_java_method_signature_types(node, source);
3656
3657 methods.push(FunctionSymbol {
3658 name: method_name,
3659 fqn,
3660 class_fqn,
3661 return_type,
3662 param_types,
3663 param_count,
3664 modifiers: Vec::new(),
3665 is_pointer_receiver: None,
3666 });
3667 }
3668 }
3669 _ => {}
3670 }
3671 });
3672
3673 (classes, methods)
3674}
3675
3676fn extract_java_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
3678 let mut out = Vec::new();
3679 let root = tree.root_node();
3680 walk_tree(root, |node| {
3681 if node.kind() == "ERROR" || node.is_missing() {
3682 let pos = node.start_position();
3683 let s = node.start_byte() as usize;
3684 let e = (node.end_byte() as usize).min(source.len());
3685 let snippet = if s < e {
3686 source[s..e]
3687 .chars()
3688 .take(120)
3689 .collect::<String>()
3690 .replace('\n', " ")
3691 } else {
3692 String::new()
3693 };
3694 out.push((pos.row + 1, pos.column + 1, snippet));
3695 }
3696 });
3697 out
3698}
3699
3700fn java_simple_type_name(type_text: &str) -> String {
3702 let t = type_text.trim();
3703 t.split('<')
3704 .next()
3705 .unwrap_or(t)
3706 .trim()
3707 .split_whitespace()
3708 .last()
3709 .unwrap_or(t)
3710 .trim()
3711 .to_string()
3712}
3713
3714fn java_type_node_display(node: Node, source: &str) -> Option<String> {
3716 let s = node.start_byte() as usize;
3717 let e = (node.end_byte() as usize).min(source.len());
3718 if s >= e {
3719 return None;
3720 }
3721 Some(source[s..e].trim().to_string())
3722}
3723
3724fn extract_java_method_signature_types(
3726 method: Node,
3727 source: &str,
3728) -> (Option<String>, Vec<String>, usize) {
3729 let return_type = method
3730 .child_by_field_name("type")
3731 .and_then(|n| java_type_node_display(n, source))
3732 .map(|full| java_simple_type_name(&full));
3733
3734 let mut param_types: Vec<String> = Vec::new();
3735 if let Some(params) = method.child_by_field_name("parameters") {
3736 let mut i = 0usize;
3737 while let Some(child) = params.child(i) {
3738 i += 1;
3739 if child.kind() == "formal_parameter" || child.kind() == "spread_parameter" {
3740 if let Some(tn) = child.child_by_field_name("type") {
3741 if let Some(full) = java_type_node_display(tn, source) {
3742 param_types.push(java_simple_type_name(&full));
3743 }
3744 }
3745 }
3746 }
3747 }
3748
3749 let param_count = param_types.len();
3750 (return_type, param_types, param_count)
3751}
3752
3753fn extract_java_inheritance_edges(
3755 tree: &Tree,
3756 source: &str,
3757 package: Option<&str>,
3758) -> Vec<(String, String)> {
3759 let mut edges = Vec::new();
3760 let import_map = build_internal_import_map(source);
3761 let root = tree.root_node();
3762
3763 walk_tree(root, |node| {
3764 if node.kind() != "class_declaration" {
3765 return;
3766 }
3767 let Some(class_name) = identifier_text_from_children(node, source) else {
3768 return;
3769 };
3770 let child_fqn = match package {
3771 Some(pkg) => format!("{pkg}.{class_name}"),
3772 None => class_name,
3773 };
3774
3775 let resolve = |type_node: Node| {
3776 java_resolve_type_fqn(type_node, source, package, &import_map)
3777 };
3778
3779 if let Some(super_c) = node.child_by_field_name("superclass") {
3780 if let Some(p) = resolve(super_c) {
3781 edges.push((child_fqn.clone(), p));
3782 }
3783 }
3784 if let Some(ifs) = node.child_by_field_name("interfaces") {
3785 let mut i = 0usize;
3786 while let Some(ch) = ifs.child(i) {
3787 i += 1;
3788 if ch.kind() == "type_list" {
3789 let mut j = 0usize;
3790 while let Some(t) = ch.child(j) {
3791 j += 1;
3792 if let Some(p) = resolve(t) {
3793 edges.push((child_fqn.clone(), p));
3794 }
3795 }
3796 }
3797 }
3798 }
3799 });
3800
3801 edges
3802}
3803
3804fn java_resolve_type_fqn(
3805 type_node: Node,
3806 source: &str,
3807 package: Option<&str>,
3808 import_map: &HashMap<String, String>,
3809) -> Option<String> {
3810 let s = type_node.start_byte() as usize;
3811 let e = (type_node.end_byte() as usize).min(source.len());
3812 if s < e && type_node.kind() == "scoped_type_identifier" {
3813 let text = source[s..e].trim();
3814 if text.contains('.') && !text.is_empty() {
3815 return Some(text.to_string());
3816 }
3817 }
3818
3819 let simple: String = match type_node.kind() {
3820 "integral_type" | "floating_point_type" | "boolean_type" | "void_type" => {
3821 if s >= e {
3822 return None;
3823 }
3824 source[s..e].trim().to_string()
3825 }
3826 _ => type_node
3827 .child_by_field_name("name")
3828 .and_then(|n| {
3829 let s = n.start_byte() as usize;
3830 let e = (n.end_byte() as usize).min(source.len());
3831 if s < e {
3832 Some(source[s..e].to_string())
3833 } else {
3834 None
3835 }
3836 })
3837 .or_else(|| {
3838 let mut found: Option<String> = None;
3839 walk_tree(type_node, |n| {
3840 if found.is_some() {
3841 return;
3842 }
3843 if n.kind() == "type_identifier" {
3844 let s = n.start_byte() as usize;
3845 let e = (n.end_byte() as usize).min(source.len());
3846 if s < e {
3847 found = Some(source[s..e].to_string());
3848 }
3849 }
3850 });
3851 found
3852 })?,
3853 };
3854
3855 if is_java_primitive_or_builtin(&simple) {
3856 return None;
3857 }
3858 Some(
3859 import_map
3860 .get(&simple)
3861 .cloned()
3862 .unwrap_or_else(|| match package {
3863 Some(pkg) => format!("{pkg}.{simple}"),
3864 None => simple,
3865 }),
3866 )
3867}
3868
3869fn extract_java_class_annotations(
3871 tree: &Tree,
3872 source: &str,
3873 package: Option<&str>,
3874) -> Vec<(String, Vec<String>)> {
3875 let mut out = Vec::new();
3876 let root = tree.root_node();
3877 walk_tree(root, |node| {
3878 if node.kind() != "class_declaration" && node.kind() != "interface_declaration" {
3879 return;
3880 }
3881 let Some(class_name) = identifier_text_from_children(node, source) else {
3882 return;
3883 };
3884 let class_fqn = match package {
3885 Some(pkg) => format!("{pkg}.{class_name}"),
3886 None => class_name,
3887 };
3888 let anns = java_modifiers_annotations(node, source);
3889 if !anns.is_empty() {
3890 out.push((class_fqn, anns));
3891 }
3892 });
3893 out
3894}
3895
3896fn extract_java_method_annotations(
3898 tree: &Tree,
3899 source: &str,
3900 package: Option<&str>,
3901) -> Vec<(String, Vec<String>)> {
3902 let mut out = Vec::new();
3903 let root = tree.root_node();
3904 walk_tree(root, |node| {
3905 if node.kind() != "method_declaration" {
3906 return;
3907 }
3908 let Some(method_name) = identifier_text_from_children(node, source) else {
3909 return;
3910 };
3911 let mut parent = node.parent();
3912 let mut class_fqn: Option<String> = None;
3913 while let Some(p) = parent {
3914 let pk = p.kind();
3915 if pk == "class_declaration" || pk == "interface_declaration" {
3916 if let Some(class_name) = identifier_text_from_children(p, source) {
3917 class_fqn = Some(match package {
3918 Some(pkg) => format!("{pkg}.{class_name}"),
3919 None => class_name,
3920 });
3921 }
3922 break;
3923 }
3924 parent = p.parent();
3925 }
3926 let fqn = match &class_fqn {
3927 Some(cls) => format!("{cls}.{method_name}"),
3928 None => match package {
3929 Some(pkg) => format!("{pkg}.{method_name}"),
3930 None => method_name.clone(),
3931 },
3932 };
3933 let anns = java_modifiers_annotations(node, source);
3934 if !anns.is_empty() {
3935 out.push((fqn, anns));
3936 }
3937 });
3938 out
3939}
3940
3941fn java_modifiers_annotations(node: Node, source: &str) -> Vec<String> {
3942 let mut names = Vec::new();
3943 let mut i = 0usize;
3944 while let Some(ch) = node.child(i) {
3945 i += 1;
3946 if ch.kind() == "modifiers" {
3947 let mut j = 0usize;
3948 while let Some(m) = ch.child(j) {
3949 j += 1;
3950 if m.kind() == "marker_annotation" || m.kind() == "annotation" {
3951 if let Some(n) = m.named_child(0) {
3952 if n.kind() == "identifier" || n.kind() == "scoped_identifier" {
3953 let s = n.start_byte() as usize;
3954 let e = (n.end_byte() as usize).min(source.len());
3955 if s < e {
3956 let raw = source[s..e].to_string();
3957 let simple = raw.rsplit('.').next().unwrap_or(&raw).to_string();
3958 names.push(simple);
3959 }
3960 }
3961 }
3962 }
3963 }
3964 }
3965 }
3966 names
3967}
3968
3969fn java_type_has_autowired(type_node: Node, source: &str) -> bool {
3970 let mut found = false;
3971 walk_tree(type_node, |n| {
3972 if n.kind() == "modifiers" {
3973 let mut j = 0usize;
3974 while let Some(m) = n.child(j) {
3975 j += 1;
3976 if m.kind() == "marker_annotation" || m.kind() == "annotation" {
3977 if let Some(id) = m.named_child(0) {
3978 let s = id.start_byte() as usize;
3979 let e = (id.end_byte() as usize).min(source.len());
3980 if s < e {
3981 let raw = &source[s..e];
3982 if raw == "Autowired" || raw.ends_with(".Autowired") {
3983 found = true;
3984 }
3985 }
3986 }
3987 }
3988 }
3989 }
3990 });
3991 found
3992}
3993
3994fn extract_java_injected_dependencies(
3996 tree: &Tree,
3997 source: &str,
3998 package: Option<&str>,
3999) -> Vec<(String, String)> {
4000 let mut out = Vec::new();
4001 let import_map = build_internal_import_map(source);
4002 let root = tree.root_node();
4003
4004 walk_tree(root, |node| {
4005 if node.kind() != "class_declaration" {
4006 return;
4007 }
4008 let Some(class_name) = identifier_text_from_children(node, source) else {
4009 return;
4010 };
4011 let class_fqn = match package {
4012 Some(pkg) => format!("{pkg}.{class_name}"),
4013 None => class_name,
4014 };
4015
4016 let mut i = 0usize;
4017 while let Some(body) = node.child(i) {
4018 i += 1;
4019 if body.kind() != "class_body" {
4020 continue;
4021 }
4022 let mut j = 0usize;
4023 while let Some(member) = body.child(j) {
4024 j += 1;
4025 match member.kind() {
4026 "constructor_declaration" => {
4027 if let Some(params) = member.child_by_field_name("parameters") {
4028 let mut k = 0usize;
4029 while let Some(p) = params.child(k) {
4030 k += 1;
4031 if p.kind() == "formal_parameter" {
4032 if let Some(tn) = p.child_by_field_name("type") {
4033 if let Some(fqn) =
4034 java_resolve_type_fqn(tn, source, package, &import_map)
4035 {
4036 out.push((class_fqn.clone(), fqn));
4037 }
4038 }
4039 }
4040 }
4041 }
4042 }
4043 "field_declaration" => {
4044 if !java_type_has_autowired(member, source) {
4045 continue;
4046 }
4047 let type_node = member
4048 .child_by_field_name("type")
4049 .or_else(|| {
4050 let mut k = 0usize;
4051 while let Some(ch) = member.child(k) {
4052 if matches!(
4053 ch.kind(),
4054 "type_identifier"
4055 | "generic_type"
4056 | "array_type"
4057 | "integral_type"
4058 | "floating_point_type"
4059 | "boolean_type"
4060 | "void_type"
4061 ) {
4062 return Some(ch);
4063 }
4064 k += 1;
4065 }
4066 None
4067 });
4068 if let Some(tn) = type_node {
4069 if let Some(fqn) = java_resolve_type_fqn(tn, source, package, &import_map)
4070 {
4071 out.push((class_fqn.clone(), fqn));
4072 }
4073 }
4074 }
4075 _ => {}
4076 }
4077 }
4078 }
4079 });
4080
4081 out
4082}
4083
4084fn extract_csharp_namespace_line_fallback(source: &str) -> Option<String> {
4086 for line in source.lines() {
4087 let trimmed = line.trim();
4088 if trimmed.starts_with("namespace ") {
4089 let rest = &trimmed["namespace ".len()..];
4090 let ns = rest
4091 .split(|c: char| c == '{' || c == ';' || c.is_whitespace())
4092 .next()?
4093 .trim();
4094 if !ns.is_empty() {
4095 return Some(ns.to_string());
4096 }
4097 }
4098 }
4099 None
4100}
4101
4102fn extract_csharp_namespace_from_ast(tree: &Tree, source: &str) -> Option<String> {
4104 let root = tree.root_node();
4105 for i in 0..root.named_child_count() {
4106 let c = root.named_child(i)?;
4107 if c.kind() == "file_scoped_namespace_declaration" {
4108 return csharp_namespace_declaration_name(c, source);
4109 }
4110 }
4111 for i in 0..root.named_child_count() {
4112 let c = root.named_child(i)?;
4113 if c.kind() == "namespace_declaration" {
4114 return csharp_namespace_declaration_name(c, source);
4115 }
4116 }
4117 None
4118}
4119
4120fn csharp_namespace_declaration_name(decl: Node, source: &str) -> Option<String> {
4122 if !matches!(
4123 decl.kind(),
4124 "namespace_declaration" | "file_scoped_namespace_declaration"
4125 ) {
4126 return None;
4127 }
4128 decl.child_by_field_name("name")
4129 .and_then(|n| csharp_node_text(n, source))
4130 .filter(|s| !s.is_empty())
4131}
4132
4133fn csharp_enclosing_namespace_prefix(node: Node, source: &str) -> Option<String> {
4135 let mut segments: Vec<String> = Vec::new();
4136 let mut cur = node.parent();
4137 while let Some(p) = cur {
4138 if matches!(
4139 p.kind(),
4140 "namespace_declaration" | "file_scoped_namespace_declaration"
4141 ) {
4142 if let Some(name) = csharp_namespace_declaration_name(p, source) {
4143 segments.push(name);
4144 }
4145 }
4146 cur = p.parent();
4147 }
4148 segments.reverse();
4149 if segments.is_empty() {
4150 None
4151 } else {
4152 Some(segments.join("."))
4153 }
4154}
4155
4156fn collect_csharp_file_namespace_strings(tree: &Tree, source: &str) -> Vec<String> {
4158 let mut seen: HashSet<String> = HashSet::new();
4159 let root = tree.root_node();
4160 walk_tree(root, |n| {
4161 if matches!(
4162 n.kind(),
4163 "class_declaration"
4164 | "interface_declaration"
4165 | "struct_declaration"
4166 | "enum_declaration"
4167 | "record_declaration"
4168 | "record_struct_declaration"
4169 ) {
4170 if let Some(p) = csharp_enclosing_namespace_prefix(n, source) {
4171 seen.insert(p);
4172 }
4173 }
4174 });
4175 if seen.is_empty() {
4176 if let Some(p) = extract_csharp_namespace_from_ast(tree, source) {
4177 seen.insert(p);
4178 } else if let Some(p) = extract_csharp_namespace_line_fallback(source) {
4179 seen.insert(p);
4180 }
4181 }
4182 let mut v: Vec<String> = seen.into_iter().collect();
4183 v.sort();
4184 v
4185}
4186
4187fn extract_csharp_namespace(tree: &Tree, source: &str) -> Option<String> {
4189 extract_csharp_namespace_from_ast(tree, source).or_else(|| extract_csharp_namespace_line_fallback(source))
4190}
4191
4192#[derive(Debug, Clone, PartialEq, Eq)]
4194enum CSharpUsingKind {
4195 Namespace(String),
4197 Static(String),
4199 Alias { alias: String, target: String },
4201}
4202
4203#[derive(Debug, Clone, Default)]
4205struct CSharpUsingSummary {
4206 namespace_imports: Vec<String>,
4207 alias_map: HashMap<String, String>,
4208}
4209
4210fn normalize_csharp_global_prefix(s: &str) -> String {
4211 s.replace("global::", "").trim().to_string()
4212}
4213
4214fn is_csharp_system_or_microsoft_namespace(ns: &str) -> bool {
4216 let ns = ns.trim();
4217 let ns = ns.strip_prefix("global::").unwrap_or(ns).trim();
4218 match ns.split('.').next() {
4219 Some("System") | Some("Microsoft") => true,
4220 _ => false,
4221 }
4222}
4223
4224fn parse_csharp_using_directive(node: Node, source: &str) -> Option<CSharpUsingKind> {
4226 let start = node.start_byte() as usize;
4227 let end = node.end_byte() as usize;
4228 let raw = source.get(start..end)?.trim();
4229 let mut body = raw.strip_suffix(';')?.trim();
4230
4231 if let Some(rest) = body.strip_prefix("global") {
4232 if rest.starts_with(char::is_whitespace) {
4233 body = rest.trim_start();
4234 }
4235 }
4236
4237 body = body.strip_prefix("using")?.trim();
4238
4239 if let Some(rest) = body.strip_prefix("static") {
4240 if rest.starts_with(char::is_whitespace) {
4241 let target = normalize_csharp_global_prefix(rest.trim_start());
4242 return if target.is_empty() {
4243 None
4244 } else {
4245 Some(CSharpUsingKind::Static(target))
4246 };
4247 }
4248 }
4249
4250 if let Some(eq_pos) = body.find('=') {
4251 let left = body[..eq_pos].trim();
4252 let right = body[eq_pos + 1..].trim();
4253 if !left.is_empty()
4254 && !right.is_empty()
4255 && left.chars().all(|c| c.is_alphanumeric() || c == '_')
4256 {
4257 return Some(CSharpUsingKind::Alias {
4258 alias: left.to_string(),
4259 target: normalize_csharp_global_prefix(right),
4260 });
4261 }
4262 }
4263
4264 let ns = normalize_csharp_global_prefix(body);
4265 if ns.is_empty() {
4266 None
4267 } else {
4268 Some(CSharpUsingKind::Namespace(ns))
4269 }
4270}
4271
4272fn extract_csharp_using_summary(tree: &Tree, source: &str) -> CSharpUsingSummary {
4274 let mut namespace_imports: Vec<String> = Vec::new();
4275 let mut seen_ns: HashSet<String> = HashSet::new();
4276 let mut alias_map: HashMap<String, String> = HashMap::new();
4277
4278 walk_tree(tree.root_node(), |node| {
4279 if node.kind() != "using_directive" {
4280 return;
4281 }
4282 let Some(kind) = parse_csharp_using_directive(node, source) else {
4283 return;
4284 };
4285 match kind {
4286 CSharpUsingKind::Namespace(ns) => {
4287 if !is_csharp_system_or_microsoft_namespace(&ns) && seen_ns.insert(ns.clone()) {
4288 namespace_imports.push(ns);
4289 }
4290 }
4291 CSharpUsingKind::Static(_) => {}
4292 CSharpUsingKind::Alias { alias, target } => {
4293 alias_map.insert(alias, target);
4294 }
4295 }
4296 });
4297
4298 CSharpUsingSummary {
4299 namespace_imports,
4300 alias_map,
4301 }
4302}
4303
4304#[derive(Debug, Default, Clone)]
4306struct CSharpBatchIndex {
4307 namespace_to_paths: HashMap<String, Vec<String>>,
4308 simple_name_to_fqns: HashMap<String, Vec<String>>,
4309 class_fqns: HashSet<String>,
4310}
4311
4312fn build_csharp_batch_index(files: &[ParsedFile], root: &Path) -> CSharpBatchIndex {
4313 let mut namespace_to_paths: HashMap<String, Vec<String>> = HashMap::new();
4314 let mut simple_name_to_fqns: HashMap<String, Vec<String>> = HashMap::new();
4315 let mut class_fqns: HashSet<String> = HashSet::new();
4316
4317 for file in files {
4318 if file.language != LanguageId::CSharp {
4319 continue;
4320 }
4321 let path_str = neo4j_path_string(root, &file.path);
4322 let source = &file.source;
4323 let tree = &file.tree;
4324 for ns in collect_csharp_file_namespace_strings(tree, source) {
4325 namespace_to_paths
4326 .entry(ns)
4327 .or_default()
4328 .push(path_str.clone());
4329 }
4330 let (classes, _, _) = extract_csharp_symbols(tree, source);
4331 for c in classes {
4332 class_fqns.insert(c.fqn.clone());
4333 simple_name_to_fqns
4334 .entry(c.name.clone())
4335 .or_default()
4336 .push(c.fqn.clone());
4337 }
4338 }
4339
4340 for v in namespace_to_paths.values_mut() {
4341 v.sort();
4342 v.dedup();
4343 }
4344 for v in simple_name_to_fqns.values_mut() {
4345 v.sort();
4346 v.dedup();
4347 }
4348
4349 CSharpBatchIndex {
4350 namespace_to_paths,
4351 simple_name_to_fqns,
4352 class_fqns,
4353 }
4354}
4355
4356fn csharp_effective_import_namespaces(
4358 namespace_imports: &[String],
4359 alias_map: &HashMap<String, String>,
4360 class_fqns: &HashSet<String>,
4361) -> Vec<String> {
4362 let mut out: Vec<String> = namespace_imports.to_vec();
4363 for target in alias_map.values() {
4364 if !class_fqns.contains(target) && !is_csharp_system_or_microsoft_namespace(target) {
4365 out.push(target.clone());
4366 }
4367 }
4368 out.sort();
4369 out.dedup();
4370 out
4371}
4372
4373fn resolve_csharp_type_fqn(
4375 simple: &str,
4376 current_ns: Option<&str>,
4377 namespace_imports: &[String],
4378 alias_map: &HashMap<String, String>,
4379 index: &CSharpBatchIndex,
4380) -> Option<String> {
4381 if let Some(target) = alias_map.get(simple) {
4382 if index.class_fqns.contains(target) {
4383 return Some(target.clone());
4384 }
4385 }
4386
4387 let imports = csharp_effective_import_namespaces(namespace_imports, alias_map, &index.class_fqns);
4388
4389 let candidates: Vec<String> = index
4390 .simple_name_to_fqns
4391 .get(simple)
4392 .cloned()
4393 .unwrap_or_default();
4394
4395 let mut filtered: Vec<String> = candidates
4396 .into_iter()
4397 .filter(|fqn| {
4398 let Some((decl_ns, base_name)) = fqn.rsplit_once('.') else {
4399 return false;
4400 };
4401 if base_name != simple {
4402 return false;
4403 }
4404 if Some(decl_ns) == current_ns {
4405 return true;
4406 }
4407 imports.iter().any(|im| im.as_str() == decl_ns)
4408 })
4409 .collect();
4410
4411 if filtered.is_empty() {
4412 return current_ns.map(|ns| format!("{ns}.{simple}"));
4413 }
4414
4415 if filtered.len() == 1 {
4416 return Some(filtered.pop().expect("one element"));
4417 }
4418
4419 if let Some(ns) = current_ns {
4420 if let Some(hit) = filtered
4421 .iter()
4422 .find(|fqn| fqn.rsplit_once('.').map(|(d, _)| d) == Some(ns))
4423 {
4424 return Some(hit.clone());
4425 }
4426 }
4427
4428 filtered.sort();
4429 Some(filtered[0].clone())
4430}
4431
4432fn csharp_well_known_static_type(name: &str) -> Option<&'static str> {
4434 match name {
4435 "Console" => Some("System.Console"),
4436 "String" => Some("System.String"),
4437 "Math" => Some("System.Math"),
4438 "Object" => Some("System.Object"),
4439 "Environment" => Some("System.Environment"),
4440 _ => None,
4441 }
4442}
4443
4444fn csharp_node_text(node: Node, source: &str) -> Option<String> {
4445 let s = node.start_byte() as usize;
4446 let e = node.end_byte() as usize;
4447 source.get(s..e).map(str::trim).map(String::from)
4448}
4449
4450fn csharp_type_text_to_simple_for_resolve(type_text: &str) -> String {
4452 let t = type_text.split('<').next().unwrap_or(type_text).trim();
4453 t.split('.').last().unwrap_or(t).trim().to_string()
4454}
4455
4456fn csharp_resolve_type_node_to_fqn(
4457 type_node: Node,
4458 source: &str,
4459 namespace: Option<&str>,
4460 using_summary: &CSharpUsingSummary,
4461 index: &CSharpBatchIndex,
4462) -> Option<String> {
4463 let raw = csharp_node_text(type_node, source)?;
4464 if raw == "var" || raw.is_empty() {
4465 return None;
4466 }
4467 let simple = csharp_type_text_to_simple_for_resolve(&raw);
4468 if simple.is_empty() {
4469 return None;
4470 }
4471 resolve_csharp_type_fqn(
4472 &simple,
4473 namespace,
4474 &using_summary.namespace_imports,
4475 &using_summary.alias_map,
4476 index,
4477 )
4478}
4479
4480fn csharp_variable_declaration_child(node: Node) -> Option<Node> {
4481 for i in 0..node.child_count() {
4482 if let Some(c) = node.child(i) {
4483 if c.kind() == "variable_declaration" {
4484 return Some(c);
4485 }
4486 }
4487 }
4488 None
4489}
4490
4491fn csharp_collect_variable_declaration_bindings(
4492 var_decl: Node,
4493 source: &str,
4494 namespace: Option<&str>,
4495 using_summary: &CSharpUsingSummary,
4496 index: &CSharpBatchIndex,
4497 out: &mut HashMap<String, String>,
4498) {
4499 let Some(type_node) = var_decl.child_by_field_name("type") else {
4500 return;
4501 };
4502 let Some(type_fqn) =
4503 csharp_resolve_type_node_to_fqn(type_node, source, namespace, using_summary, index)
4504 else {
4505 return;
4506 };
4507 for i in 0..var_decl.child_count() {
4508 let Some(child) = var_decl.child(i) else {
4509 continue;
4510 };
4511 if child.kind() != "variable_declarator" {
4512 continue;
4513 }
4514 let Some(id) = child.child(0).filter(|c| c.kind() == "identifier") else {
4515 continue;
4516 };
4517 if let Some(name) = csharp_node_text(id, source) {
4518 out.insert(name, type_fqn.clone());
4519 }
4520 }
4521}
4522
4523fn csharp_collect_fields_for_type_declaration(
4524 type_decl: Node,
4525 source: &str,
4526 namespace: Option<&str>,
4527 using_summary: &CSharpUsingSummary,
4528 index: &CSharpBatchIndex,
4529) -> HashMap<String, String> {
4530 let mut fields = HashMap::new();
4531 if !matches!(
4532 type_decl.kind(),
4533 "class_declaration"
4534 | "interface_declaration"
4535 | "struct_declaration"
4536 | "enum_declaration"
4537 | "record_declaration"
4538 | "record_struct_declaration"
4539 ) {
4540 return fields;
4541 }
4542 let Some(body) = type_decl.child_by_field_name("body") else {
4543 return fields;
4544 };
4545 for i in 0..body.child_count() {
4546 let Some(member) = body.child(i) else {
4547 continue;
4548 };
4549 if member.kind() != "field_declaration" {
4550 continue;
4551 }
4552 let Some(vd) = csharp_variable_declaration_child(member) else {
4553 continue;
4554 };
4555 csharp_collect_variable_declaration_bindings(
4556 vd,
4557 source,
4558 namespace,
4559 using_summary,
4560 index,
4561 &mut fields,
4562 );
4563 }
4564 fields
4565}
4566
4567fn csharp_collect_locals_in_scope(
4568 scope_root: Node,
4569 source: &str,
4570 namespace: Option<&str>,
4571 using_summary: &CSharpUsingSummary,
4572 index: &CSharpBatchIndex,
4573) -> HashMap<String, String> {
4574 let mut locals = HashMap::new();
4575 walk_tree(scope_root, |n| {
4576 if n.kind() != "local_declaration_statement" {
4577 return;
4578 }
4579 let Some(vd) = csharp_variable_declaration_child(n) else {
4580 return;
4581 };
4582 csharp_collect_variable_declaration_bindings(
4583 vd,
4584 source,
4585 namespace,
4586 using_summary,
4587 index,
4588 &mut locals,
4589 );
4590 });
4591 locals
4592}
4593
4594fn csharp_type_declaration_kind_str(decl_kind: &str) -> &'static str {
4595 match decl_kind {
4596 "enum_declaration" => "enum",
4597 "interface_declaration" => "interface",
4598 "struct_declaration" => "struct",
4599 "record_struct_declaration" => "struct",
4600 "record_declaration" => "record",
4601 _ => "class",
4602 }
4603}
4604
4605fn csharp_fqn_for_type_declaration(
4609 decl: Node,
4610 source: &str,
4611 legacy_file_namespace: Option<&str>,
4612) -> Option<String> {
4613 let leaf_name = identifier_text_from_children(decl, source)?;
4614 let mut segments = vec![leaf_name];
4615 let mut cur = decl.parent();
4616 while let Some(p) = cur {
4617 let pk = p.kind();
4618 if matches!(
4619 pk,
4620 "class_declaration"
4621 | "interface_declaration"
4622 | "struct_declaration"
4623 | "enum_declaration"
4624 | "record_declaration"
4625 | "record_struct_declaration"
4626 ) {
4627 if let Some(n) = identifier_text_from_children(p, source) {
4628 segments.push(n);
4629 }
4630 }
4631 cur = p.parent();
4632 }
4633 segments.reverse();
4634 let qualified = segments.join(".");
4635 let ns = csharp_enclosing_namespace_prefix(decl, source)
4636 .or_else(|| legacy_file_namespace.map(|s| s.to_string()));
4637 Some(match ns {
4638 Some(n) => format!("{n}.{qualified}"),
4639 None => qualified,
4640 })
4641}
4642
4643fn csharp_innermost_enclosing_type_declaration(from: Node) -> Option<Node> {
4644 let mut cur = from.parent();
4645 while let Some(p) = cur {
4646 let pk = p.kind();
4647 if matches!(
4648 pk,
4649 "class_declaration"
4650 | "interface_declaration"
4651 | "struct_declaration"
4652 | "enum_declaration"
4653 | "record_declaration"
4654 | "record_struct_declaration"
4655 ) {
4656 return Some(p);
4657 }
4658 cur = p.parent();
4659 }
4660 None
4661}
4662
4663fn csharp_constructor_function_fqn(class_fqn: &str, arity: usize) -> String {
4665 format!("{class_fqn}.ctor#{arity}")
4666}
4667
4668fn csharp_constructor_symbol_fqn(ctor: Node, class_fqn: &str, source: &str) -> (String, String) {
4670 let modifiers = csharp_collect_method_modifiers(ctor, source);
4671 if modifiers.iter().any(|m| m == "static") {
4672 return ("cctor".to_string(), format!("{class_fqn}.cctor"));
4673 }
4674 let param_types = csharp_method_parameter_types(ctor, source);
4675 let arity = param_types.len();
4676 (
4677 format!("ctor#{arity}"),
4678 csharp_constructor_function_fqn(class_fqn, arity),
4679 )
4680}
4681
4682fn csharp_method_simple_name(method: Node, source: &str) -> Option<String> {
4683 method
4684 .child_by_field_name("name")
4685 .and_then(|n| csharp_node_text(n, source))
4686 .filter(|s| !s.is_empty())
4687 .or_else(|| identifier_text_from_children(method, source))
4688}
4689
4690fn csharp_collect_method_modifiers(method: Node, source: &str) -> Vec<String> {
4691 let mut out = Vec::new();
4692 for i in 0..method.child_count() {
4693 let Some(c) = method.child(i) else {
4694 continue;
4695 };
4696 if c.kind() == "modifier" {
4697 if let Some(t) = csharp_node_text(c, source) {
4698 if !t.is_empty() {
4699 out.push(t);
4700 }
4701 }
4702 }
4703 }
4704 out
4705}
4706
4707fn csharp_method_return_type_node(method: Node) -> Option<Node> {
4709 method
4710 .child_by_field_name("returns")
4711 .or_else(|| method.child_by_field_name("type"))
4712}
4713
4714fn csharp_method_return_type_string(method: Node, source: &str) -> Option<String> {
4715 let t = csharp_method_return_type_node(method)?;
4716 if t.kind() == "void_keyword" {
4717 return Some("void".to_string());
4718 }
4719 csharp_node_text(t, source)
4720}
4721
4722fn csharp_method_parameter_types(method: Node, source: &str) -> Vec<String> {
4723 let mut out = Vec::new();
4724 let Some(params) = method.child_by_field_name("parameters") else {
4725 return out;
4726 };
4727 for i in 0..params.child_count() {
4728 let Some(p) = params.child(i) else {
4729 continue;
4730 };
4731 if p.kind() != "parameter" {
4732 continue;
4733 }
4734 let Some(ty) = p.child_by_field_name("type") else {
4735 continue;
4736 };
4737 if let Some(s) = csharp_node_text(ty, source) {
4738 if !s.is_empty() {
4739 out.push(s);
4740 }
4741 }
4742 }
4743 out
4744}
4745
4746fn csharp_enclosing_type_fqn(node: Node, source: &str, namespace: Option<&str>) -> Option<String> {
4747 let inner = csharp_innermost_enclosing_type_declaration(node)?;
4748 csharp_fqn_for_type_declaration(inner, source, namespace)
4749}
4750
4751fn csharp_block_body(node: Node) -> Option<Node> {
4752 for i in 0..node.child_count() {
4753 if let Some(c) = node.child(i) {
4754 if c.kind() == "block" {
4755 return Some(c);
4756 }
4757 }
4758 }
4759 None
4760}
4761
4762fn csharp_property_name_for_accessor(acc: Node, source: &str) -> Option<String> {
4763 let list = acc.parent()?;
4764 let prop = list.parent()?;
4765 if prop.kind() != "property_declaration" {
4766 return None;
4767 }
4768 let name_node = prop.child_by_field_name("name")?;
4769 csharp_node_text(name_node, source)
4770}
4771
4772fn csharp_accessor_kind_prefix(acc: Node, source: &str) -> &'static str {
4773 for i in 0..acc.child_count() {
4774 if let Some(c) = acc.child(i) {
4775 if let Some(t) = csharp_node_text(c, source) {
4776 match t.as_str() {
4777 "get" => return "get",
4778 "set" => return "set",
4779 "init" => return "init",
4780 _ => {}
4781 }
4782 }
4783 }
4784 }
4785 "accessor"
4786}
4787
4788fn csharp_method_name_from_simple_name_node(node: Node, source: &str) -> Option<String> {
4789 match node.kind() {
4790 "identifier" => csharp_node_text(node, source),
4791 "generic_name" => node
4792 .child(0)
4793 .filter(|c| c.kind() == "identifier")
4794 .and_then(|c| csharp_node_text(c, source)),
4795 _ => {
4796 let t = csharp_node_text(node, source)?;
4797 Some(csharp_type_text_to_simple_for_resolve(&t))
4798 }
4799 }
4800}
4801
4802fn csharp_unwrap_parenthesized_invoke_expr(mut expr: Node) -> Node {
4803 while expr.kind() == "parenthesized_expression" {
4804 if let Some(inner) = expr.child(1) {
4805 expr = inner;
4806 } else {
4807 break;
4808 }
4809 }
4810 expr
4811}
4812
4813fn csharp_resolve_member_receiver_to_type_fqn(
4814 mut receiver: Node,
4815 source: &str,
4816 class_fqn: Option<&str>,
4817 field_map: &HashMap<String, String>,
4818 local_map: &HashMap<String, String>,
4819 namespace: Option<&str>,
4820 using_summary: &CSharpUsingSummary,
4821 index: &CSharpBatchIndex,
4822) -> Option<String> {
4823 while receiver.kind() == "member_access_expression" {
4824 receiver = receiver.child_by_field_name("expression")?;
4825 }
4826 match receiver.kind() {
4827 "this_expression" | "base_expression" | "this" | "base" => class_fqn.map(String::from),
4829 "identifier" => {
4830 let name = csharp_node_text(receiver, source)?;
4831 if let Some(t) = local_map.get(&name) {
4832 return Some(t.clone());
4833 }
4834 if let Some(t) = field_map.get(&name) {
4835 return Some(t.clone());
4836 }
4837 if let Some(wk) = csharp_well_known_static_type(&name) {
4838 return Some(wk.to_string());
4839 }
4840 resolve_csharp_type_fqn(
4841 &name,
4842 namespace,
4843 &using_summary.namespace_imports,
4844 &using_summary.alias_map,
4845 index,
4846 )
4847 }
4848 _ => None,
4849 }
4850}
4851
4852fn csharp_resolve_invocation_callee_fqn(
4853 fn_expr: Node,
4854 source: &str,
4855 class_fqn: Option<&str>,
4856 field_map: &HashMap<String, String>,
4857 local_map: &HashMap<String, String>,
4858 namespace: Option<&str>,
4859 using_summary: &CSharpUsingSummary,
4860 index: &CSharpBatchIndex,
4861) -> Option<String> {
4862 let fn_expr = csharp_unwrap_parenthesized_invoke_expr(fn_expr);
4863 match fn_expr.kind() {
4864 "identifier" => {
4865 let name = csharp_node_text(fn_expr, source)?;
4866 if let Some(cls) = class_fqn {
4867 Some(format!("{cls}.{name}"))
4868 } else if let Some(ns) = namespace {
4869 Some(format!("{ns}.{name}"))
4870 } else {
4871 Some(name)
4872 }
4873 }
4874 "generic_name" => {
4875 let name = csharp_method_name_from_simple_name_node(fn_expr, source)?;
4876 if let Some(cls) = class_fqn {
4877 Some(format!("{cls}.{name}"))
4878 } else if let Some(ns) = namespace {
4879 Some(format!("{ns}.{name}"))
4880 } else {
4881 Some(name)
4882 }
4883 }
4884 "member_access_expression" => {
4885 let name_node = fn_expr.child_by_field_name("name")?;
4886 let method_name = csharp_method_name_from_simple_name_node(name_node, source)?;
4887 let recv = fn_expr.child_by_field_name("expression")?;
4888 let recv_ty = csharp_resolve_member_receiver_to_type_fqn(
4889 recv,
4890 source,
4891 class_fqn,
4892 field_map,
4893 local_map,
4894 namespace,
4895 using_summary,
4896 index,
4897 )?;
4898 Some(format!("{recv_ty}.{method_name}"))
4899 }
4900 _ => None,
4901 }
4902}
4903
4904fn csharp_collect_calls_from_body(
4905 body: Node,
4906 caller_fqn: &str,
4907 class_fqn: Option<&str>,
4908 field_map: &HashMap<String, String>,
4909 source: &str,
4910 namespace: Option<&str>,
4911 using_summary: &CSharpUsingSummary,
4912 index: &CSharpBatchIndex,
4913 calls: &mut Vec<(String, String)>,
4914) {
4915 let locals = csharp_collect_locals_in_scope(body, source, namespace, using_summary, index);
4916 walk_tree(body, |n| {
4917 if n.kind() != "invocation_expression" {
4918 return;
4919 }
4920 let Some(fn_node) = n.child_by_field_name("function") else {
4921 return;
4922 };
4923 let Some(callee) = csharp_resolve_invocation_callee_fqn(
4924 fn_node,
4925 source,
4926 class_fqn,
4927 field_map,
4928 &locals,
4929 namespace,
4930 using_summary,
4931 index,
4932 ) else {
4933 return;
4934 };
4935 calls.push((caller_fqn.to_string(), callee));
4936 });
4937}
4938
4939fn extract_csharp_symbols(
4945 tree: &Tree,
4946 source: &str,
4947) -> (
4948 Vec<ClassSymbol>,
4949 Vec<FunctionSymbol>,
4950 Vec<PropertySymbol>,
4951) {
4952 let mut classes: Vec<ClassSymbol> = Vec::new();
4953 let mut methods: Vec<FunctionSymbol> = Vec::new();
4954 let mut properties: Vec<PropertySymbol> = Vec::new();
4955 let mut seen_class_fqns: HashSet<String> = HashSet::new();
4956 let mut seen_property_fqns: HashSet<String> = HashSet::new();
4957
4958 let namespace = extract_csharp_namespace(tree, source);
4959 let root = tree.root_node();
4960
4961 walk_tree(root, |node| {
4962 let nk = node.kind();
4963 match nk {
4964 "class_declaration"
4965 | "interface_declaration"
4966 | "struct_declaration"
4967 | "enum_declaration"
4968 | "record_declaration"
4969 | "record_struct_declaration" => {
4970 let Some(name) = identifier_text_from_children(node, source) else {
4971 return;
4972 };
4973 let Some(fqn) = csharp_fqn_for_type_declaration(node, source, namespace.as_deref()) else {
4974 return;
4975 };
4976 if seen_class_fqns.insert(fqn.clone()) {
4977 let kind = Some(csharp_type_declaration_kind_str(nk));
4978 classes.push(ClassSymbol { name, fqn, kind });
4979 }
4980 }
4981 "method_declaration" => {
4982 let Some(method_name) = csharp_method_simple_name(node, source) else {
4983 return;
4984 };
4985 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace.as_deref());
4986 let fqn = if let Some(ref cls) = class_fqn {
4987 format!("{cls}.{method_name}")
4988 } else if let Some(ref ns) = namespace {
4989 format!("{ns}.{method_name}")
4990 } else {
4991 method_name.clone()
4992 };
4993
4994 let param_types = csharp_method_parameter_types(node, source);
4995 let param_count = param_types.len();
4996 let return_type = csharp_method_return_type_string(node, source);
4997 let modifiers = csharp_collect_method_modifiers(node, source);
4998
4999 methods.push(FunctionSymbol {
5000 name: method_name,
5001 fqn,
5002 class_fqn,
5003 return_type,
5004 param_types,
5005 param_count,
5006 modifiers,
5007 is_pointer_receiver: None,
5008 });
5009 }
5010 "constructor_declaration" => {
5011 let Some(class_fqn) =
5012 csharp_enclosing_type_fqn(node, source, namespace.as_deref())
5013 else {
5014 return;
5015 };
5016 let (name, fqn) = csharp_constructor_symbol_fqn(node, &class_fqn, source);
5017 let param_types = csharp_method_parameter_types(node, source);
5018 let param_count = param_types.len();
5019 let modifiers = csharp_collect_method_modifiers(node, source);
5020 methods.push(FunctionSymbol {
5021 name,
5022 fqn,
5023 class_fqn: Some(class_fqn),
5024 return_type: None,
5025 param_types,
5026 param_count,
5027 modifiers,
5028 is_pointer_receiver: None,
5029 });
5030 }
5031 "property_declaration" => {
5032 let Some(class_fqn) =
5033 csharp_enclosing_type_fqn(node, source, namespace.as_deref())
5034 else {
5035 return;
5036 };
5037 let Some(prop_name_node) = node.child_by_field_name("name") else {
5038 return;
5039 };
5040 let Some(prop_name) = csharp_node_text(prop_name_node, source).filter(|s| !s.is_empty())
5041 else {
5042 return;
5043 };
5044 let prop_fqn = format!("{class_fqn}.{prop_name}");
5045 if seen_property_fqns.insert(prop_fqn.clone()) {
5046 let declared_type = node
5047 .child_by_field_name("type")
5048 .and_then(|t| csharp_node_text(t, source))
5049 .filter(|s| !s.is_empty());
5050 properties.push(PropertySymbol {
5051 class_fqn: class_fqn.clone(),
5052 name: prop_name.clone(),
5053 fqn: prop_fqn,
5054 declared_type,
5055 });
5056 }
5057 let prop_type_text = node
5058 .child_by_field_name("type")
5059 .and_then(|t| csharp_node_text(t, source))
5060 .filter(|s| !s.is_empty());
5061 let Some(accessor_list) = node.child_by_field_name("accessors") else {
5062 return;
5063 };
5064 for i in 0..accessor_list.child_count() {
5065 let Some(acc) = accessor_list.child(i) else {
5066 continue;
5067 };
5068 if acc.kind() != "accessor_declaration" {
5069 continue;
5070 }
5071 let prefix = csharp_accessor_kind_prefix(acc, source);
5072 let fn_name = format!("{prefix}_{prop_name}");
5073 let fqn = format!("{class_fqn}.{fn_name}");
5074 let return_type = match prefix {
5075 "get" => prop_type_text.clone(),
5076 "set" | "init" => Some("void".to_string()),
5077 _ => prop_type_text.clone(),
5078 };
5079 let param_types = csharp_method_parameter_types(acc, source);
5080 let param_count = param_types.len();
5081 let modifiers = csharp_collect_method_modifiers(acc, source);
5082 methods.push(FunctionSymbol {
5083 name: fn_name,
5084 fqn,
5085 class_fqn: Some(class_fqn.clone()),
5086 return_type,
5087 param_types,
5088 param_count,
5089 modifiers,
5090 is_pointer_receiver: None,
5091 });
5092 }
5093 }
5094 _ => {}
5095 }
5096 });
5097
5098 (classes, methods, properties)
5099}
5100
5101fn csharp_for_each_unique_type_root(method: Node, seen: &mut HashSet<(usize, usize)>, f: &mut impl FnMut(Node)) {
5104 let mut push = |n: Option<Node>| {
5105 let Some(t) = n else {
5106 return;
5107 };
5108 if t.kind() == "void_keyword" {
5109 return;
5110 }
5111 let key = (t.start_byte() as usize, t.end_byte() as usize);
5112 if seen.insert(key) {
5113 f(t);
5114 }
5115 };
5116
5117 push(csharp_method_return_type_node(method));
5118
5119 walk_tree_skip_local_functions(method, |n| {
5120 match n.kind() {
5121 "local_declaration_statement" => {
5122 if let Some(vd) = csharp_variable_declaration_child(n) {
5123 push(vd.child_by_field_name("type"));
5124 }
5125 }
5126 "variable_declaration" | "parameter" | "object_creation_expression" | "cast_expression"
5127 | "default_expression" => {
5128 push(n.child_by_field_name("type"));
5129 }
5130 "as_expression" | "is_expression" => {
5131 push(n.child_by_field_name("right"));
5132 }
5133 "type_pattern" => {
5134 if let Some(c) = n.named_child(0) {
5135 push(Some(c));
5136 }
5137 }
5138 _ => {}
5139 }
5140 });
5141}
5142
5143fn csharp_collect_simple_type_names_from_type_node(node: Node, source: &str, out: &mut Vec<String>) {
5145 match node.kind() {
5146 "type_identifier" | "identifier" => {
5147 if let Some(t) = csharp_node_text(node, source) {
5148 out.push(t);
5149 }
5150 }
5151 "generic_name" => {
5152 for i in 0..node.child_count() {
5153 let Some(c) = node.child(i) else {
5154 continue;
5155 };
5156 match c.kind() {
5157 "identifier" => {
5158 if let Some(t) = csharp_node_text(c, source) {
5159 out.push(t);
5160 }
5161 }
5162 "type_argument_list" => {
5163 for j in 0..c.named_child_count() {
5164 if let Some(arg) = c.named_child(j) {
5165 csharp_collect_simple_type_names_from_type_node(arg, source, out);
5166 }
5167 }
5168 }
5169 _ => {}
5170 }
5171 }
5172 }
5173 "qualified_name" => {
5174 for i in 0..node.child_count() {
5175 let Some(c) = node.child(i) else {
5176 continue;
5177 };
5178 match c.kind() {
5179 "qualified_name" | "alias_qualified_name" | "generic_name" | "type_identifier"
5180 | "identifier" => {
5181 csharp_collect_simple_type_names_from_type_node(c, source, out);
5182 }
5183 _ => {}
5184 }
5185 }
5186 }
5187 "alias_qualified_name" => {
5188 for i in 0..node.child_count() {
5189 let Some(c) = node.child(i) else {
5190 continue;
5191 };
5192 if matches!(c.kind(), "generic_name" | "identifier" | "type_identifier") {
5193 csharp_collect_simple_type_names_from_type_node(c, source, out);
5194 }
5195 }
5196 }
5197 "nullable_type" => {
5198 if let Some(c) = node.named_child(0) {
5199 csharp_collect_simple_type_names_from_type_node(c, source, out);
5200 }
5201 }
5202 "array_type" => {
5203 if let Some(c) = node.child_by_field_name("type") {
5204 csharp_collect_simple_type_names_from_type_node(c, source, out);
5205 }
5206 }
5207 "pointer_type" => {
5208 if let Some(c) = node.named_child(0) {
5209 csharp_collect_simple_type_names_from_type_node(c, source, out);
5210 }
5211 }
5212 "tuple_type" => {
5213 for i in 0..node.named_child_count() {
5214 let Some(el) = node.named_child(i) else {
5215 continue;
5216 };
5217 if el.kind() == "tuple_element" {
5218 if let Some(tn) = el.child_by_field_name("type") {
5219 csharp_collect_simple_type_names_from_type_node(tn, source, out);
5220 }
5221 }
5222 }
5223 }
5224 "predefined_type" => {}
5225 _ => {
5226 if let Some(inner) = node.child_by_field_name("type") {
5227 csharp_collect_simple_type_names_from_type_node(inner, source, out);
5228 }
5229 }
5230 }
5231}
5232
5233fn csharp_for_each_base_type_in_list(base_list: Node, mut f: impl FnMut(Node)) {
5234 for i in 0..base_list.child_count() {
5235 let Some(c) = base_list.child(i) else {
5236 continue;
5237 };
5238 match c.kind() {
5239 ":" | "," => continue,
5240 "argument_list" => continue,
5242 _ => f(c),
5243 }
5244 }
5245}
5246
5247fn csharp_type_declaration_base_list(decl: Node) -> Option<Node> {
5249 decl.child_by_field_name("bases")
5250 .filter(|b| !b.is_missing())
5251 .or_else(|| {
5252 (0..decl.named_child_count())
5253 .filter_map(|i| decl.named_child(i))
5254 .find(|c| c.kind() == "base_list")
5255 })
5256}
5257
5258fn extract_csharp_class_inheritance_edges(
5260 tree: &Tree,
5261 source: &str,
5262 legacy_ns: Option<&str>,
5263 using_summary: &CSharpUsingSummary,
5264 index: &CSharpBatchIndex,
5265) -> Vec<(String, String)> {
5266 let mut out = Vec::new();
5267 let mut seen: HashSet<(String, String)> = HashSet::new();
5268 let root = tree.root_node();
5269 walk_tree(root, |n| {
5270 if !matches!(
5271 n.kind(),
5272 "class_declaration" | "interface_declaration" | "struct_declaration" | "record_declaration"
5273 ) {
5274 return;
5275 }
5276 let Some(derived_fqn) = csharp_fqn_for_type_declaration(n, source, legacy_ns) else {
5277 return;
5278 };
5279 let Some(bases_field) = csharp_type_declaration_base_list(n) else {
5280 return;
5281 };
5282 if bases_field.is_missing() {
5283 return;
5284 }
5285 let ns_for = csharp_enclosing_namespace_prefix(n, source)
5286 .or_else(|| legacy_ns.map(|s| s.to_string()));
5287 csharp_for_each_base_type_in_list(bases_field, |ty_node| {
5288 let mut names: Vec<String> = Vec::new();
5289 csharp_collect_simple_type_names_from_type_node(ty_node, source, &mut names);
5290 let Some(simple) = names.first() else {
5291 return;
5292 };
5293 if is_csharp_builtin_type(simple) {
5294 return;
5295 }
5296 let base_fqn = resolve_csharp_type_fqn(
5297 simple,
5298 ns_for.as_deref(),
5299 &using_summary.namespace_imports,
5300 &using_summary.alias_map,
5301 index,
5302 )
5303 .unwrap_or_else(|| {
5304 ns_for
5305 .as_ref()
5306 .map(|ns| format!("{ns}.{simple}"))
5307 .unwrap_or_else(|| simple.clone())
5308 });
5309 if base_fqn != derived_fqn && seen.insert((derived_fqn.clone(), base_fqn.clone())) {
5310 out.push((derived_fqn.clone(), base_fqn));
5311 }
5312 });
5313 });
5314 out
5315}
5316
5317fn csharp_is_likely_type_parameter_name(name: &str) -> bool {
5319 let mut it = name.chars();
5320 match (it.next(), it.next()) {
5321 (Some(c), None) if c.is_ascii_uppercase() => true,
5322 _ => false,
5323 }
5324}
5325
5326fn extract_csharp_used_classes(
5329 tree: &Tree,
5330 source: &str,
5331 namespace: Option<&str>,
5332 using_summary: &CSharpUsingSummary,
5333 csharp_index: &CSharpBatchIndex,
5334) -> Vec<(String, String)> {
5335 let mut uses: Vec<(String, String)> = Vec::new();
5336 let mut pair_seen: HashSet<(String, String)> = HashSet::new();
5337 let root = tree.root_node();
5338
5339 walk_tree(root, |node| {
5340 if node.kind() != "method_declaration" {
5341 return;
5342 }
5343
5344 let Some(method_name) = csharp_method_simple_name(node, source) else {
5345 return;
5346 };
5347
5348 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
5349 .or_else(|| namespace.map(|s| s.to_string()));
5350 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
5351 let func_fqn = if let Some(ref cls) = class_fqn {
5352 format!("{cls}.{method_name}")
5353 } else if let Some(ref ns) = effective_ns {
5354 format!("{ns}.{method_name}")
5355 } else {
5356 method_name
5357 };
5358
5359 let mut root_seen: HashSet<(usize, usize)> = HashSet::new();
5360 let mut simple_names: Vec<String> = Vec::new();
5361 csharp_for_each_unique_type_root(node, &mut root_seen, &mut |tr| {
5362 csharp_collect_simple_type_names_from_type_node(tr, source, &mut simple_names);
5363 });
5364
5365 for type_name in simple_names {
5366 if is_csharp_builtin_type(&type_name) {
5367 continue;
5368 }
5369 if csharp_is_likely_type_parameter_name(&type_name)
5370 && !using_summary.alias_map.contains_key(&type_name)
5371 {
5372 continue;
5373 }
5374 if !type_name
5375 .chars()
5376 .next()
5377 .map(|c| c.is_uppercase())
5378 .unwrap_or(false)
5379 {
5380 continue;
5381 }
5382 let type_fqn = resolve_csharp_type_fqn(
5383 &type_name,
5384 effective_ns.as_deref(),
5385 &using_summary.namespace_imports,
5386 &using_summary.alias_map,
5387 csharp_index,
5388 )
5389 .unwrap_or_else(|| {
5390 effective_ns
5391 .as_ref()
5392 .map(|ns| format!("{ns}.{type_name}"))
5393 .unwrap_or_else(|| type_name.clone())
5394 });
5395 if pair_seen.insert((func_fqn.clone(), type_fqn.clone())) {
5396 uses.push((func_fqn.clone(), type_fqn));
5397 }
5398 }
5399 });
5400
5401 uses
5402}
5403
5404fn is_csharp_primitive_or_alias(name: &str) -> bool {
5406 matches!(
5407 name,
5408 "int" | "long" | "short" | "byte" | "float" | "double" | "decimal"
5409 | "bool" | "char" | "string" | "object" | "void" | "dynamic" | "var"
5410 | "nint" | "nuint"
5411 | "Int32" | "Int64" | "Int16" | "UInt32" | "UInt64" | "UInt16"
5412 | "Byte" | "SByte" | "Single" | "Double" | "Decimal"
5413 | "Boolean" | "Char" | "String" | "Object" | "Void"
5414 )
5415}
5416
5417fn is_csharp_common_bcl_or_framework_type(name: &str) -> bool {
5419 matches!(
5420 name,
5421 "List" | "Dictionary" | "IEnumerable" | "IEnumerator" | "IList" | "ICollection" | "IDictionary"
5423 | "IReadOnlyList" | "IReadOnlyCollection" | "IReadOnlyDictionary" | "ISet" | "HashSet"
5424 | "SortedSet" | "Queue" | "Stack" | "LinkedList" | "SortedList" | "ConcurrentBag"
5425 | "ConcurrentQueue" | "ConcurrentStack" | "ConcurrentDictionary" | "ObservableCollection"
5426 | "ImmutableArray" | "ImmutableList" | "ImmutableDictionary" | "IOrderedEnumerable"
5427 | "Task" | "ValueTask" | "CancellationToken" | "CancellationTokenSource"
5429 | "IAsyncEnumerable" | "IAsyncEnumerator" | "Parallel" | "Thread" | "Interlocked"
5430 | "Func" | "Action" | "MulticastDelegate" | "Delegate"
5432 | "DateTime" | "DateTimeOffset" | "TimeSpan" | "DateOnly" | "TimeOnly" | "Guid"
5434 | "StringBuilder" | "Encoding" | "UTF8Encoding" | "ASCIIEncoding" | "UnicodeEncoding"
5436 | "Stream" | "MemoryStream" | "FileStream" | "BufferedStream" | "TextReader" | "TextWriter"
5437 | "StringReader" | "StringWriter" | "BinaryReader" | "BinaryWriter" | "File" | "Path"
5438 | "Directory" | "Environment"
5439 | "Uri" | "Version" | "Type" | "Enum" | "Array" | "Nullable" | "Lazy" | "Tuple"
5441 | "ValueTuple" | "Console" | "Math" | "Convert" | "BitConverter" | "GC" | "WeakReference"
5442 | "RuntimeHelpers" | "Activator"
5443 | "Exception" | "ArgumentException" | "ArgumentNullException" | "ArgumentOutOfRangeException"
5445 | "InvalidOperationException" | "NotSupportedException" | "NotImplementedException"
5446 | "IOException" | "UnauthorizedAccessException" | "TimeoutException" | "AggregateException"
5447 | "OperationCanceledException" | "ObjectDisposedException" | "FormatException"
5448 | "Span" | "ReadOnlySpan" | "Memory" | "ReadOnlyMemory"
5450 | "IDisposable" | "IAsyncDisposable" | "IComparable" | "IEquatable" | "IFormattable"
5452 | "IServiceProvider" | "ILogger" | "IConfiguration" | "IHost" | "IHostedService"
5453 | "IHttpClientFactory" | "HttpClient" | "HttpRequestMessage" | "HttpResponseMessage"
5454 )
5455}
5456
5457fn is_csharp_builtin_type(name: &str) -> bool {
5458 is_csharp_primitive_or_alias(name) || is_csharp_common_bcl_or_framework_type(name)
5459}
5460
5461fn extract_csharp_calls(
5463 tree: &Tree,
5464 source: &str,
5465 namespace: Option<&str>,
5466 using_summary: &CSharpUsingSummary,
5467 csharp_index: &CSharpBatchIndex,
5468) -> Vec<(String, String)> {
5469 let mut calls: Vec<(String, String)> = Vec::new();
5470 let root = tree.root_node();
5471 let mut class_field_maps: HashMap<String, HashMap<String, String>> = HashMap::new();
5472
5473 walk_tree(root, |node| {
5474 if matches!(
5475 node.kind(),
5476 "class_declaration"
5477 | "interface_declaration"
5478 | "struct_declaration"
5479 | "enum_declaration"
5480 | "record_declaration"
5481 | "record_struct_declaration"
5482 ) {
5483 if let Some(cfqn) = csharp_fqn_for_type_declaration(node, source, namespace) {
5484 let ns_for = csharp_enclosing_namespace_prefix(node, source)
5485 .or_else(|| namespace.map(|s| s.to_string()));
5486 let fm = csharp_collect_fields_for_type_declaration(
5487 node,
5488 source,
5489 ns_for.as_deref(),
5490 using_summary,
5491 csharp_index,
5492 );
5493 class_field_maps.insert(cfqn, fm);
5494 }
5495 }
5496 });
5497
5498 let empty_fields: HashMap<String, String> = HashMap::new();
5499
5500 walk_tree(root, |node| {
5501 match node.kind() {
5502 "method_declaration" => {
5503 let Some(method_name) = csharp_method_simple_name(node, source) else {
5504 return;
5505 };
5506 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
5507 .or_else(|| namespace.map(|s| s.to_string()));
5508 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
5509 let caller_fqn = match &class_fqn {
5510 Some(cf) => format!("{cf}.{method_name}"),
5511 None => effective_ns
5512 .as_ref()
5513 .map(|ns| format!("{ns}.{method_name}"))
5514 .unwrap_or(method_name),
5515 };
5516 let Some(body) = csharp_block_body(node) else {
5517 return;
5518 };
5519 let field_map = class_fqn
5520 .as_ref()
5521 .and_then(|c| class_field_maps.get(c))
5522 .unwrap_or(&empty_fields);
5523 csharp_collect_calls_from_body(
5524 body,
5525 &caller_fqn,
5526 class_fqn.as_deref(),
5527 field_map,
5528 source,
5529 effective_ns.as_deref(),
5530 using_summary,
5531 csharp_index,
5532 &mut calls,
5533 );
5534 }
5535 "constructor_declaration" => {
5536 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
5537 let Some(cf) = class_fqn.clone() else {
5538 return;
5539 };
5540 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
5541 .or_else(|| namespace.map(|s| s.to_string()));
5542 let (_, caller_fqn) = csharp_constructor_symbol_fqn(node, &cf, source);
5543 let Some(body) = csharp_block_body(node) else {
5544 return;
5545 };
5546 let field_map = class_field_maps.get(&cf).unwrap_or(&empty_fields);
5547 csharp_collect_calls_from_body(
5548 body,
5549 &caller_fqn,
5550 Some(cf.as_str()),
5551 field_map,
5552 source,
5553 effective_ns.as_deref(),
5554 using_summary,
5555 csharp_index,
5556 &mut calls,
5557 );
5558 }
5559 "accessor_declaration" => {
5560 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
5561 let Some(cf) = class_fqn.clone() else {
5562 return;
5563 };
5564 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
5565 .or_else(|| namespace.map(|s| s.to_string()));
5566 let Some(prop_name) = csharp_property_name_for_accessor(node, source) else {
5567 return;
5568 };
5569 let prefix = csharp_accessor_kind_prefix(node, source);
5570 let caller_fqn = format!("{cf}.{}_{}", prefix, prop_name);
5571 let Some(body) = csharp_block_body(node) else {
5572 return;
5573 };
5574 let field_map = class_field_maps.get(&cf).unwrap_or(&empty_fields);
5575 csharp_collect_calls_from_body(
5576 body,
5577 &caller_fqn,
5578 Some(cf.as_str()),
5579 field_map,
5580 source,
5581 effective_ns.as_deref(),
5582 using_summary,
5583 csharp_index,
5584 &mut calls,
5585 );
5586 }
5587 _ => {}
5588 }
5589 });
5590
5591 calls
5592}
5593
5594fn csharp_string_value_from_literal_node(node: Node, source: &str) -> Option<String> {
5596 let s = source.get(node.start_byte() as usize..node.end_byte() as usize)?;
5597 let t = s.trim();
5598 match node.kind() {
5599 "string_literal" => {
5600 if t.starts_with('"') && t.ends_with('"') && t.len() >= 2 {
5601 Some(t[1..t.len() - 1].to_string())
5602 } else {
5603 None
5604 }
5605 }
5606 "verbatim_string_literal" => {
5607 if t.starts_with("@\"") && t.ends_with('"') && t.len() >= 3 {
5608 Some(t[2..t.len() - 1].replace("\"\"", "\""))
5609 } else {
5610 None
5611 }
5612 }
5613 _ => None,
5614 }
5615}
5616
5617fn csharp_first_string_in_subtree(root: Node, source: &str) -> Option<String> {
5619 let mut stack = vec![root];
5620 while let Some(n) = stack.pop() {
5621 match n.kind() {
5622 "string_literal" | "verbatim_string_literal" => {
5623 return csharp_string_value_from_literal_node(n, source);
5624 }
5625 _ => {
5626 let cc = n.child_count();
5627 for i in (0..cc).rev() {
5628 if let Some(c) = n.child(i) {
5629 stack.push(c);
5630 }
5631 }
5632 }
5633 }
5634 }
5635 None
5636}
5637
5638fn csharp_first_string_in_attribute(attr: Node, source: &str) -> Option<String> {
5639 for i in 0..attr.child_count() {
5640 let Some(c) = attr.child(i) else {
5641 continue;
5642 };
5643 if c.kind() != "attribute_argument_list" {
5644 continue;
5645 }
5646 for j in 0..c.named_child_count() {
5647 let Some(arg) = c.named_child(j) else {
5648 continue;
5649 };
5650 if arg.kind() == "attribute_argument" {
5651 if let Some(s) = csharp_first_string_in_subtree(arg, source) {
5652 return Some(s);
5653 }
5654 }
5655 }
5656 }
5657 None
5658}
5659
5660fn csharp_attribute_simple_name_from_name_node(n: Node, source: &str) -> Option<String> {
5662 match n.kind() {
5663 "identifier" => csharp_node_text(n, source),
5664 "generic_name" => {
5665 let id = n.child(0).filter(|c| c.kind() == "identifier")?;
5666 csharp_node_text(id, source)
5667 }
5668 "qualified_name" | "alias_qualified_name" => {
5669 let mut last = None;
5670 for i in 0..n.child_count() {
5671 let Some(c) = n.child(i) else {
5672 continue;
5673 };
5674 if !c.is_named() {
5675 continue;
5676 }
5677 if let Some(s) = csharp_attribute_simple_name_from_name_node(c, source) {
5678 last = Some(s);
5679 }
5680 }
5681 last
5682 }
5683 _ => None,
5684 }
5685}
5686
5687fn csharp_route_path_from_str(path: &str) -> Option<String> {
5688 let path = path.trim();
5689 if path.is_empty() {
5690 None
5691 } else if path.starts_with('/') {
5692 Some(path.to_string())
5693 } else {
5694 Some(format!("/{}", path))
5695 }
5696}
5697
5698fn csharp_type_level_route_template(type_node: Node, source: &str) -> Option<String> {
5700 let mut last: Option<String> = None;
5701 for i in 0..type_node.child_count() {
5702 let Some(c) = type_node.child(i) else {
5703 continue;
5704 };
5705 if c.kind() != "attribute_list" {
5706 continue;
5707 }
5708 for j in 0..c.child_count() {
5709 let Some(attr) = c.child(j) else {
5710 continue;
5711 };
5712 if attr.kind() != "attribute" {
5713 continue;
5714 }
5715 let Some(name_n) = attr.child_by_field_name("name") else {
5716 continue;
5717 };
5718 let Some(simple) = csharp_attribute_simple_name_from_name_node(name_n, source) else {
5719 continue;
5720 };
5721 if simple != "Route" {
5722 continue;
5723 }
5724 if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
5725 last = csharp_route_path_from_str(&raw);
5726 }
5727 }
5728 }
5729 last
5730}
5731
5732fn csharp_enclosing_route_type_for_method(method: Node) -> Option<Node> {
5733 let mut p = method.parent();
5734 while let Some(n) = p {
5735 let k = n.kind();
5736 if k == "class_declaration" || k == "record_declaration" {
5737 return Some(n);
5738 }
5739 p = n.parent();
5740 }
5741 None
5742}
5743
5744fn csharp_push_http_verb(verbs: &mut Vec<String>, v: &str) {
5745 if !verbs.iter().any(|x| x == v) {
5746 verbs.push(v.to_string());
5747 }
5748}
5749
5750fn csharp_parse_method_api_attributes(
5752 method: Node,
5753 source: &str,
5754) -> (Vec<String>, Option<String>, Option<String>) {
5755 let mut verbs: Vec<String> = Vec::new();
5756 let mut http_template: Option<String> = None;
5757 let mut route_attr: Option<String> = None;
5758
5759 const VERB_ATTRS: &[(&str, &str)] = &[
5760 ("HttpGet", "GET"),
5761 ("HttpPost", "POST"),
5762 ("HttpPut", "PUT"),
5763 ("HttpDelete", "DELETE"),
5764 ("HttpPatch", "PATCH"),
5765 ("HttpHead", "HEAD"),
5766 ];
5767
5768 for i in 0..method.child_count() {
5769 let Some(c) = method.child(i) else {
5770 continue;
5771 };
5772 if c.kind() != "attribute_list" {
5773 continue;
5774 }
5775 for j in 0..c.child_count() {
5776 let Some(attr) = c.child(j) else {
5777 continue;
5778 };
5779 if attr.kind() != "attribute" {
5780 continue;
5781 }
5782 let Some(name_n) = attr.child_by_field_name("name") else {
5783 continue;
5784 };
5785 let Some(simple) = csharp_attribute_simple_name_from_name_node(name_n, source) else {
5786 continue;
5787 };
5788
5789 if simple == "Route" {
5790 if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
5791 route_attr = csharp_route_path_from_str(&raw);
5792 }
5793 continue;
5794 }
5795
5796 if let Some((_, verb)) = VERB_ATTRS.iter().find(|(a, _)| *a == simple.as_str()) {
5797 csharp_push_http_verb(&mut verbs, verb);
5798 if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
5799 http_template = csharp_route_path_from_str(&raw);
5800 }
5801 }
5802 }
5803 }
5804
5805 (verbs, http_template, route_attr)
5806}
5807
5808fn csharp_join_route_parts(
5810 class_route: Option<&str>,
5811 method_template: Option<&str>,
5812 method_name: &str,
5813) -> String {
5814 fn strip_slashes(s: &str) -> &str {
5815 s.trim().trim_start_matches('/').trim_end_matches('/')
5816 }
5817
5818 let method_part = method_template
5819 .map(|s| s.trim())
5820 .filter(|s| !s.is_empty())
5821 .map(strip_slashes)
5822 .filter(|s| !s.is_empty())
5823 .map(|s| s.to_string())
5824 .unwrap_or_else(|| method_name.trim().to_string());
5825
5826 let Some(base_raw) = class_route.map(str::trim).filter(|s| !s.is_empty()) else {
5827 let m = strip_slashes(&method_part);
5828 return format!("/{}", m);
5829 };
5830
5831 let base = strip_slashes(base_raw);
5832 if base.is_empty() {
5833 return format!("/{}", strip_slashes(&method_part));
5834 }
5835 format!("/{}/{}", base, strip_slashes(&method_part))
5836}
5837
5838fn extract_csharp_api_endpoints_from_tree(tree: &Tree, source: &str) -> Vec<(Vec<String>, String, String)> {
5843 let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
5844 let root = tree.root_node();
5845
5846 walk_tree(root, |node| {
5847 if node.kind() != "method_declaration" {
5848 return;
5849 }
5850
5851 let method_name = node
5852 .child_by_field_name("name")
5853 .and_then(|n| csharp_node_text(n, source))
5854 .or_else(|| identifier_text_from_children(node, source));
5855
5856 let Some(method_name) = method_name else {
5857 return;
5858 };
5859
5860 let (verbs, http_template, route_attr) = csharp_parse_method_api_attributes(node, source);
5861
5862 if verbs.is_empty() && http_template.is_none() && route_attr.is_none() {
5863 return;
5864 }
5865
5866 let methods_http = if verbs.is_empty() {
5867 vec!["ANY".to_string()]
5868 } else {
5869 verbs
5870 };
5871
5872 let method_segment = http_template.or(route_attr);
5873 let class_route = csharp_enclosing_route_type_for_method(node)
5874 .and_then(|t| csharp_type_level_route_template(t, source));
5875
5876 let path_template = csharp_join_route_parts(
5877 class_route.as_deref(),
5878 method_segment.as_deref(),
5879 &method_name,
5880 );
5881
5882 endpoints.push((methods_http, path_template, method_name));
5883 });
5884
5885 endpoints
5886}
5887
5888fn build_internal_import_map(source: &str) -> HashMap<String, String> {
5892 let mut map = HashMap::new();
5893 for fqn in extract_internal_java_imports(source) {
5894 if let Some(simple) = fqn.rsplit('.').next() {
5895 map.insert(simple.to_string(), fqn.clone());
5896 }
5897 }
5898 map
5899}
5900
5901fn collect_local_var_types_for_method(
5907 method_node: Node,
5908 source: &str,
5909 package: Option<&str>,
5910 import_map: &HashMap<String, String>,
5911) -> HashMap<String, String> {
5912 let mut vars: HashMap<String, String> = HashMap::new();
5913
5914 walk_tree(method_node, |node| {
5915 if node.kind() != "local_variable_declaration" {
5916 return;
5917 }
5918
5919 let start = node.start_byte() as usize;
5920 let end = node.end_byte() as usize;
5921 if end > source.len() || start >= end {
5922 return;
5923 }
5924
5925 let stmt = &source[start..end];
5926 let before_eq = stmt.split('=').next().unwrap_or("").trim();
5927 let mut parts = before_eq.split_whitespace();
5929 let type_part = match parts.next() {
5930 Some(t) => t,
5931 None => return,
5932 };
5933 let var_part = match parts.next() {
5934 Some(v) => v,
5935 None => return,
5936 };
5937
5938 let type_simple = type_part
5940 .split('<')
5941 .next()
5942 .unwrap_or(type_part)
5943 .split('.')
5944 .last()
5945 .unwrap_or(type_part)
5946 .trim();
5947
5948 let var_name = var_part
5950 .trim_end_matches(';')
5951 .trim_end_matches(',')
5952 .trim();
5953
5954 if var_name.is_empty() || type_simple.is_empty() {
5955 return;
5956 }
5957
5958 let fqn = if let Some(import_fqn) = import_map.get(type_simple) {
5963 import_fqn.clone()
5964 } else if let Some(pkg_prefix) = import_map
5965 .values()
5966 .find(|v| v.ends_with(".*"))
5967 {
5968 let base = pkg_prefix.trim_end_matches(".*");
5969 format!("{base}.{type_simple}")
5970 } else if let Some(pkg) = package {
5971 format!("{pkg}.{type_simple}")
5972 } else {
5973 type_simple.to_string()
5974 };
5975
5976 vars.insert(var_name.to_string(), fqn);
5977 });
5978
5979 vars
5980}
5981
5982fn collect_identifiers(node: Node, source: &str, out: &mut Vec<String>) {
5986 walk_tree(node, |n| {
5987 if n.kind() == "identifier" {
5988 let start = n.start_byte() as usize;
5989 let end = n.end_byte() as usize;
5990 if end <= source.len() && start < end {
5991 out.push(source[start..end].to_string());
5992 }
5993 }
5994 });
5995}
5996
5997fn extract_java_calls(
6007 tree: &Tree,
6008 source: &str,
6009 package: Option<&str>,
6010) -> Vec<(String, String)> {
6011 let mut calls: Vec<(String, String)> = Vec::new();
6012 let root = tree.root_node();
6013
6014 let import_map = build_internal_import_map(source);
6018
6019 walk_tree(root, |node| {
6020 if node.kind() != "method_declaration" {
6021 return;
6022 }
6023
6024 let method_name = match identifier_text_from_children(node, source) {
6027 Some(name) => name,
6028 None => return,
6029 };
6030
6031 let mut parent = node.parent();
6033 let mut class_fqn: Option<String> = None;
6034 while let Some(p) = parent {
6035 let pk = p.kind();
6036 if pk == "class_declaration" || pk == "interface_declaration" {
6037 if let Some(class_name) = identifier_text_from_children(p, source) {
6038 let full = if let Some(pkg) = package {
6039 format!("{pkg}.{class_name}")
6040 } else {
6041 class_name
6042 };
6043 class_fqn = Some(full);
6044 }
6045 break;
6046 }
6047 parent = p.parent();
6048 }
6049
6050 let caller_fqn = if let Some(ref cls) = class_fqn {
6051 format!("{cls}.{}", method_name)
6052 } else if let Some(pkg) = package {
6053 format!("{pkg}.{}", method_name)
6054 } else {
6055 method_name.clone()
6056 };
6057
6058 let local_var_types =
6062 collect_local_var_types_for_method(node, source, package, &import_map);
6063
6064 walk_tree(node, |child| {
6066 if child.kind() != "method_invocation" {
6067 return;
6068 }
6069
6070 let callee_name = if let Some(name_node) = child.child_by_field_name("name") {
6074 let start = name_node.start_byte() as usize;
6075 let end = name_node.end_byte() as usize;
6076 if end <= source.len() && start < end {
6077 source[start..end].to_string()
6078 } else {
6079 match identifier_text_from_children(child, source) {
6080 Some(name) => name,
6081 None => return,
6082 }
6083 }
6084 } else {
6085 match identifier_text_from_children(child, source) {
6086 Some(name) => name,
6087 None => return,
6088 }
6089 };
6090
6091 let mut receiver_type_fqn: Option<String> = None;
6097 if let Some(object_node) = child.child_by_field_name("object") {
6098 let mut recv_idents: Vec<String> = Vec::new();
6099 collect_identifiers(object_node, source, &mut recv_idents);
6100
6101 for ident in &recv_idents {
6105 if let Some(ty) = local_var_types.get(ident) {
6106 receiver_type_fqn = Some(ty.clone());
6107 break;
6108 }
6109 }
6110
6111 if receiver_type_fqn.is_none() {
6116 if let Some(first_ident) = recv_idents.first() {
6117 if let Some(import_fqn) = import_map.get(first_ident) {
6118 receiver_type_fqn = Some(import_fqn.clone());
6119 } else if let Some(pkg_prefix) = import_map
6120 .values()
6121 .find(|v| v.ends_with(".*"))
6122 {
6123 let base = pkg_prefix.trim_end_matches(".*");
6124 receiver_type_fqn =
6125 Some(format!("{base}.{first_ident}"));
6126 } else if let Some(pkg) = package {
6127 receiver_type_fqn =
6128 Some(format!("{pkg}.{first_ident}"));
6129 }
6130 }
6131 }
6132 }
6133
6134 let callee_fqn = if let Some(ref recv_ty) = receiver_type_fqn {
6135 format!("{recv_ty}.{}", callee_name)
6136 } else if let Some(ref cls) = class_fqn {
6137 format!("{cls}.{}", callee_name)
6138 } else if let Some(pkg) = package {
6139 format!("{pkg}.{}", callee_name)
6140 } else {
6141 callee_name.clone()
6142 };
6143
6144 calls.push((caller_fqn.clone(), callee_fqn));
6145 });
6146 });
6147
6148 calls
6149}
6150
6151fn node_text_slice(n: Node, source: &str) -> Option<String> {
6154 let s = n.start_byte() as usize;
6155 let e = (n.end_byte() as usize).min(source.len());
6156 (s < e).then(|| source[s..e].to_string())
6157}
6158
6159fn non_java_file_scoped_fqn(file_path: &str, logical_name: &str) -> String {
6160 format!("{file_path}::{logical_name}")
6161}
6162
6163fn non_java_short_name_and_depth(language: LanguageId, logical: &str) -> (String, usize) {
6165 match language {
6166 LanguageId::Rust => {
6167 let short = logical
6168 .rsplit("::")
6169 .next()
6170 .unwrap_or(logical)
6171 .to_string();
6172 (short, logical.matches("::").count())
6173 }
6174 _ => {
6175 let short = logical
6176 .rsplit_once('.')
6177 .map(|(_, s)| s)
6178 .unwrap_or(logical)
6179 .to_string();
6180 (short, logical.matches('.').count())
6181 }
6182 }
6183}
6184
6185fn rust_inside_impl(node: Node) -> bool {
6186 let mut cur = node.parent();
6187 while let Some(p) = cur {
6188 if p.kind() == "impl_item" {
6189 return true;
6190 }
6191 cur = p.parent();
6192 }
6193 false
6194}
6195
6196fn rust_enclosing_mod_prefixes(fn_node: Node, source: &str) -> Vec<String> {
6197 let mut prefixes = Vec::new();
6198 let mut cur = fn_node.parent();
6199 while let Some(p) = cur {
6200 if p.kind() == "mod_item" {
6201 if let Some(name_n) = p.child_by_field_name("name") {
6202 if let Some(name) = node_text_slice(name_n, source) {
6203 let name = name.trim();
6204 if !name.is_empty() {
6205 prefixes.insert(0, name.to_string());
6206 }
6207 }
6208 }
6209 }
6210 cur = p.parent();
6211 }
6212 prefixes
6213}
6214
6215fn rust_function_logical_name(fn_node: Node, source: &str) -> Option<String> {
6216 let name = rust_function_name(fn_node, source)?;
6217 let prefixes = rust_enclosing_mod_prefixes(fn_node, source);
6218 Some(if prefixes.is_empty() {
6219 name
6220 } else {
6221 format!("{}::{}", prefixes.join("::"), name)
6222 })
6223}
6224
6225fn rust_innermost_enclosing_function(call: Node) -> Option<Node> {
6226 let mut cur = call.parent();
6227 while let Some(p) = cur {
6228 if p.kind() == "function_item" && !rust_inside_impl(p) {
6229 return Some(p);
6230 }
6231 cur = p.parent();
6232 }
6233 None
6234}
6235
6236fn extract_rust_graph_symbols(tree: &Tree, source: &str, file_path: &str) -> Vec<FunctionSymbol> {
6237 let mut out = Vec::new();
6238 let mut seen = HashSet::new();
6239 walk_tree(tree.root_node(), |node| {
6240 if node.kind() != "function_item" || rust_inside_impl(node) {
6241 return;
6242 }
6243 let Some(logical) = rust_function_logical_name(node, source) else {
6244 return;
6245 };
6246 let fqn = non_java_file_scoped_fqn(file_path, &logical);
6247 if !seen.insert(fqn.clone()) {
6248 return;
6249 }
6250 let name = rust_function_name(node, source).unwrap_or_default();
6251 out.push(FunctionSymbol {
6252 name,
6253 fqn,
6254 class_fqn: None,
6255 return_type: None,
6256 param_types: Vec::new(),
6257 param_count: 0,
6258 modifiers: Vec::new(),
6259 is_pointer_receiver: None,
6260 });
6261 });
6262 out
6263}
6264
6265fn extract_rust_intrafile_calls(
6266 tree: &Tree,
6267 source: &str,
6268 file_path: &str,
6269 name_to_fqn: &HashMap<String, String>,
6270) -> Vec<(String, String)> {
6271 let mut calls = Vec::new();
6272 walk_tree(tree.root_node(), |inner| {
6273 if inner.kind() != "call_expression" {
6274 return;
6275 }
6276 let Some(encl) = rust_innermost_enclosing_function(inner) else {
6277 return;
6278 };
6279 let Some(logical) = rust_function_logical_name(encl, source) else {
6280 return;
6281 };
6282 let caller_fqn = non_java_file_scoped_fqn(file_path, &logical);
6283 let Some(func_n) = inner.child_by_field_name("function") else {
6284 return;
6285 };
6286 if func_n.kind() != "identifier" {
6287 return;
6288 }
6289 let Some(callee_name) = node_text_slice(func_n, source) else {
6290 return;
6291 };
6292 let callee_name = callee_name.trim();
6293 if let Some(callee_fqn) = name_to_fqn.get(callee_name) {
6294 calls.push((caller_fqn, callee_fqn.clone()));
6295 }
6296 });
6297 calls
6298}
6299
6300fn rust_use_path_segments_from_node(node: Node, source: &str) -> Option<Vec<String>> {
6301 let text = node_text_slice(node, source)?;
6302 let text = text.split('{').next()?.trim();
6303 let text = text.trim_end_matches(';').trim();
6304 if text.is_empty() {
6305 return None;
6306 }
6307 let segs: Vec<String> = text
6308 .split("::")
6309 .map(|s| s.trim().to_string())
6310 .filter(|s| !s.is_empty() && s != "*" && s != "as")
6311 .collect();
6312 if segs.is_empty() {
6313 None
6314 } else {
6315 Some(segs)
6316 }
6317}
6318
6319fn extract_rust_use_paths(tree: &Tree, source: &str) -> Vec<Vec<String>> {
6320 let mut out = Vec::new();
6321 walk_tree(tree.root_node(), |node| {
6322 if node.kind() != "use_declaration" {
6323 return;
6324 }
6325 walk_tree(node, |inner| {
6326 if inner.kind() != "scoped_identifier" && inner.kind() != "use_tree" {
6327 return;
6328 }
6329 if let Some(segs) = rust_use_path_segments_from_node(inner, source) {
6330 if !segs.is_empty() && !out.contains(&segs) {
6331 out.push(segs);
6332 }
6333 }
6334 });
6335 });
6336 out
6337}
6338
6339fn rust_file_to_module_path(file_path: &str) -> Vec<String> {
6340 let p = file_path.replace('\\', "/");
6341 let p = p.strip_prefix("src/").unwrap_or(p.as_str());
6342 if p == "lib.rs" || p == "main.rs" {
6343 return Vec::new();
6344 }
6345 if let Some(dir) = p.strip_suffix("/mod.rs") {
6346 if dir.is_empty() {
6347 return Vec::new();
6348 }
6349 return dir.split('/').map(String::from).collect();
6350 }
6351 if let Some(stem) = p.strip_suffix(".rs") {
6352 if stem.is_empty() {
6353 return Vec::new();
6354 }
6355 return stem.split('/').map(String::from).collect();
6356 }
6357 Vec::new()
6358}
6359
6360fn rust_module_path_to_file_candidates(segments: &[String]) -> Vec<String> {
6361 if segments.is_empty() {
6362 return vec!["src/lib.rs".to_string(), "src/main.rs".to_string()];
6363 }
6364 let joined = segments.join("/");
6365 vec![
6366 format!("src/{joined}.rs"),
6367 format!("src/{joined}/mod.rs"),
6368 ]
6369}
6370
6371fn resolve_rust_use_to_known_file(
6372 use_segments: &[String],
6373 current_file: &str,
6374 known_paths: &HashSet<String>,
6375) -> Option<String> {
6376 if use_segments.is_empty() {
6377 return None;
6378 }
6379 let mut mod_path = rust_file_to_module_path(current_file);
6380 let mut i = 0usize;
6381 match use_segments[0].as_str() {
6382 "crate" => {
6383 i = 1;
6384 mod_path.clear();
6385 }
6386 "super" => {
6387 i = 1;
6388 if !mod_path.is_empty() {
6389 mod_path.pop();
6390 }
6391 }
6392 "self" => {
6393 i = 1;
6394 }
6395 _ => {}
6396 }
6397 let rest: Vec<String> = use_segments[i..].to_vec();
6398 for len in (0..=rest.len()).rev() {
6399 let mut target = mod_path.clone();
6400 target.extend(rest[..len].iter().cloned());
6401 for cand in rust_module_path_to_file_candidates(&target) {
6402 if known_paths.contains(&cand) {
6403 return Some(cand);
6404 }
6405 }
6406 }
6407 None
6408}
6409
6410fn extract_non_java_function_symbols(
6411 file: &ParsedFile,
6412 source: &str,
6413 file_path: &str,
6414) -> Vec<FunctionSymbol> {
6415 match file.language {
6416 LanguageId::Rust => extract_rust_graph_symbols(&file.tree, source, file_path),
6417 LanguageId::Python => extract_python_graph_symbols(&file.tree, source, file_path),
6418 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
6419 extract_js_ts_graph_symbols(&file.tree, source, file_path, file.language)
6420 }
6421 _ => Vec::new(),
6422 }
6423}
6424
6425fn extract_python_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
6426 let mut out = Vec::new();
6427 walk_tree(tree.root_node(), |node| {
6428 if node.kind() == "ERROR" || node.is_missing() {
6429 let pos = node.start_position();
6430 let s = node.start_byte() as usize;
6431 let e = (node.end_byte() as usize).min(source.len());
6432 let snippet = if s < e {
6433 source[s..e]
6434 .chars()
6435 .take(120)
6436 .collect::<String>()
6437 .replace('\n', " ")
6438 } else {
6439 String::new()
6440 };
6441 out.push((pos.row + 1, pos.column + 1, snippet));
6442 }
6443 });
6444 out
6445}
6446
6447fn extract_js_ts_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
6448 extract_python_parse_warnings(tree, source)
6449}
6450
6451fn python_node_inside_class(mut node: Node) -> bool {
6452 while let Some(p) = node.parent() {
6453 if p.kind() == "class_definition" {
6454 return true;
6455 }
6456 node = p;
6457 }
6458 false
6459}
6460
6461fn python_function_definition_name(fn_node: Node, source: &str) -> Option<String> {
6462 fn_node
6463 .child_by_field_name("name")
6464 .and_then(|n| node_text_slice(n, source))
6465 .map(|s| s.trim().to_string())
6466 .filter(|s| !s.is_empty())
6467}
6468
6469fn python_enclosing_function_prefixes(fn_node: Node, source: &str) -> Vec<String> {
6470 let mut prefixes = Vec::new();
6471 let mut cur = fn_node.parent();
6472 while let Some(p) = cur {
6473 if p.kind() == "function_definition" && !python_node_inside_class(p) {
6474 if let Some(n) = python_function_definition_name(p, source) {
6475 prefixes.insert(0, n);
6476 }
6477 }
6478 cur = p.parent();
6479 }
6480 prefixes
6481}
6482
6483fn python_function_logical_name(fn_node: Node, source: &str) -> Option<String> {
6484 let name = python_function_definition_name(fn_node, source)?;
6485 let prefixes = python_enclosing_function_prefixes(fn_node, source);
6486 Some(if prefixes.is_empty() {
6487 name
6488 } else {
6489 format!("{}.{}", prefixes.join("."), name)
6490 })
6491}
6492
6493fn extract_python_graph_symbols(tree: &Tree, source: &str, file_path: &str) -> Vec<FunctionSymbol> {
6494 let mut out = Vec::new();
6495 let mut seen = HashSet::new();
6496 walk_tree(tree.root_node(), |node| {
6497 if node.kind() != "function_definition" {
6498 return;
6499 }
6500 if python_node_inside_class(node) {
6501 return;
6502 }
6503 let Some(logical) = python_function_logical_name(node, source) else {
6504 return;
6505 };
6506 let fqn = non_java_file_scoped_fqn(file_path, &logical);
6507 if !seen.insert(fqn.clone()) {
6508 return;
6509 }
6510 let name = python_function_definition_name(node, source).unwrap_or_default();
6511 out.push(FunctionSymbol {
6512 name,
6513 fqn,
6514 class_fqn: None,
6515 return_type: None,
6516 param_types: Vec::new(),
6517 param_count: 0,
6518 modifiers: Vec::new(),
6519 is_pointer_receiver: None,
6520 });
6521 });
6522 out
6523}
6524
6525fn python_innermost_enclosing_function(call: Node) -> Option<Node> {
6526 let mut cur = call.parent();
6527 while let Some(p) = cur {
6528 if p.kind() == "function_definition" && !python_node_inside_class(p) {
6529 return Some(p);
6530 }
6531 cur = p.parent();
6532 }
6533 None
6534}
6535
6536fn extract_python_intrafile_calls(
6537 tree: &Tree,
6538 source: &str,
6539 file_path: &str,
6540 name_to_fqn: &HashMap<String, String>,
6541) -> Vec<(String, String)> {
6542 let mut calls = Vec::new();
6543 walk_tree(tree.root_node(), |inner| {
6544 if inner.kind() != "call" {
6545 return;
6546 }
6547 let Some(encl) = python_innermost_enclosing_function(inner) else {
6548 return;
6549 };
6550 let Some(logical) = python_function_logical_name(encl, source) else {
6551 return;
6552 };
6553 let caller_fqn = non_java_file_scoped_fqn(file_path, &logical);
6554 let Some(func_n) = inner.child_by_field_name("function") else {
6555 return;
6556 };
6557 if func_n.kind() != "identifier" {
6558 return;
6559 }
6560 let Some(callee_name) = node_text_slice(func_n, source) else {
6561 return;
6562 };
6563 let callee_name = callee_name.trim();
6564 if let Some(callee_fqn) = name_to_fqn.get(callee_name) {
6565 calls.push((caller_fqn, callee_fqn.clone()));
6566 }
6567 });
6568 calls
6569}
6570
6571fn extract_python_import_modules(tree: &Tree, source: &str) -> Vec<String> {
6572 let mut out = Vec::new();
6573 walk_tree(tree.root_node(), |node| match node.kind() {
6574 "import_from_statement" => {
6575 if let Some(mod_n) = node.child_by_field_name("module_name") {
6576 let t = node_text_slice(mod_n, source).unwrap_or_default();
6577 let t = t.trim();
6578 if !t.is_empty() && t != "." && !t.starts_with('.') {
6579 out.push(t.to_string());
6580 }
6581 }
6582 }
6583 "import_statement" => {
6584 let mut c = node.walk();
6585 if !c.goto_first_child() {
6586 return;
6587 }
6588 loop {
6589 let ch = c.node();
6590 match ch.kind() {
6591 "dotted_name" => {
6592 if let Some(t) = node_text_slice(ch, source) {
6593 let t = t.trim();
6594 if !t.is_empty() {
6595 out.push(t.to_string());
6596 }
6597 }
6598 }
6599 "aliased_import" => {
6600 if let Some(name_n) = ch.child_by_field_name("name") {
6601 if name_n.kind() == "dotted_name" {
6602 if let Some(t) = node_text_slice(name_n, source) {
6603 let t = t.trim();
6604 if !t.is_empty() {
6605 out.push(t.to_string());
6606 }
6607 }
6608 }
6609 }
6610 }
6611 _ => {}
6612 }
6613 if !c.goto_next_sibling() {
6614 break;
6615 }
6616 }
6617 }
6618 _ => {}
6619 });
6620 out.sort();
6621 out.dedup();
6622 out
6623}
6624
6625fn resolve_python_import_to_known_file(
6626 module_path: &str,
6627 known_paths: &HashSet<String>,
6628) -> Option<String> {
6629 let norm = module_path.trim();
6630 if norm.is_empty() {
6631 return None;
6632 }
6633 let needle = norm.replace('.', "/");
6634 known_paths
6635 .iter()
6636 .filter(|p| {
6637 let pn = p.replace('\\', "/");
6638 pn.ends_with(".py") && pn.contains(needle.as_str())
6639 })
6640 .min_by_key(|p| p.len())
6641 .cloned()
6642}
6643
6644fn js_inside_class(mut node: Node) -> bool {
6645 while let Some(p) = node.parent() {
6646 if p.kind() == "class_declaration" {
6647 return true;
6648 }
6649 node = p;
6650 }
6651 false
6652}
6653
6654fn js_function_declaration_name(node: Node, source: &str) -> Option<String> {
6655 node.child_by_field_name("name")
6656 .and_then(|n| node_text_slice(n, source))
6657 .map(|s| s.trim().to_string())
6658 .filter(|s| !s.is_empty())
6659}
6660
6661fn js_nested_function_declaration_prefix(fn_node: Node, source: &str, self_name: &str) -> String {
6662 let mut prefixes = Vec::new();
6663 let mut cur = fn_node.parent();
6664 while let Some(p) = cur {
6665 if p.kind() == "function_declaration" && !js_inside_class(p) {
6666 if let Some(n) = js_function_declaration_name(p, source) {
6667 prefixes.insert(0, n);
6668 }
6669 }
6670 cur = p.parent();
6671 }
6672 if prefixes.is_empty() {
6673 self_name.to_string()
6674 } else {
6675 format!("{}.{}", prefixes.join("."), self_name)
6676 }
6677}
6678
6679fn js_prefix_from_ancestors_for_expr(expr_node: Node, source: &str, var_name: &str) -> String {
6680 let mut prefixes = Vec::new();
6681 let mut cur = expr_node.parent();
6682 while let Some(p) = cur {
6683 if p.kind() == "function_declaration" && !js_inside_class(p) {
6684 if let Some(n) = js_function_declaration_name(p, source) {
6685 prefixes.insert(0, n);
6686 }
6687 }
6688 cur = p.parent();
6689 }
6690 if prefixes.is_empty() {
6691 var_name.to_string()
6692 } else {
6693 format!("{}.{}", prefixes.join("."), var_name)
6694 }
6695}
6696
6697fn js_class_declaration_name_from(class_node: Node, source: &str) -> Option<String> {
6698 class_node
6699 .child_by_field_name("name")
6700 .and_then(|n| node_text_slice(n, source))
6701 .map(|s| s.trim().to_string())
6702 .filter(|s| !s.is_empty())
6703}
6704
6705fn js_enclosing_class_declaration_name(start: Node, source: &str) -> Option<String> {
6706 let mut cur = start.parent();
6707 while let Some(p) = cur {
6708 if p.kind() == "class_declaration" {
6709 return js_class_declaration_name_from(p, source);
6710 }
6711 cur = p.parent();
6712 }
6713 None
6714}
6715
6716fn js_property_name_text(method_node: Node, source: &str) -> Option<String> {
6717 let name_node = method_node
6718 .child_by_field_name("name")
6719 .or_else(|| method_node.child_by_field_name("property"))?;
6720 match name_node.kind() {
6721 "property_identifier" | "identifier" | "private_property_identifier" => {
6722 node_text_slice(name_node, source)
6723 }
6724 _ => None,
6725 }
6726 .map(|s| s.trim().to_string())
6727 .filter(|s| !s.is_empty())
6728}
6729
6730fn is_js_ts_class_member_fn(kind: &str) -> bool {
6731 matches!(
6732 kind,
6733 "method_definition"
6734 | "field_definition"
6735 | "public_field_definition"
6736 | "private_field_definition"
6737 | "protected_field_definition"
6738 )
6739}
6740
6741fn js_ts_string_inner(spec: &str) -> String {
6742 let t = spec.trim();
6743 if t.len() >= 2 {
6744 let bytes = t.as_bytes();
6745 let q = bytes[0];
6746 if (q == b'"' || q == b'\'') && bytes[t.len() - 1] == q {
6747 return t[1..t.len() - 1].to_string();
6748 }
6749 }
6750 t.to_string()
6751}
6752
6753fn extract_js_ts_import_specifiers(tree: &Tree, source: &str) -> Vec<String> {
6754 let mut out = Vec::new();
6755 walk_tree(tree.root_node(), |node| {
6756 if node.kind() != "import_statement" && node.kind() != "export_statement" {
6757 return;
6758 }
6759 if let Some(src) = node.child_by_field_name("source") {
6760 if src.kind() == "string" {
6761 if let Some(raw) = node_text_slice(src, source) {
6762 let inner = js_ts_string_inner(&raw);
6763 if !inner.is_empty() {
6764 out.push(inner);
6765 }
6766 }
6767 }
6768 }
6769 });
6770 out.sort();
6771 out.dedup();
6772 out
6773}
6774
6775fn normalized_logical_path(path: &Path) -> String {
6777 use std::path::Component;
6778 let mut parts: Vec<String> = Vec::new();
6779 let mut starts_root = false;
6780 for c in path.components() {
6781 match c {
6782 Component::RootDir => starts_root = true,
6783 Component::Prefix(_) => {}
6784 Component::CurDir => {}
6785 Component::Normal(s) => parts.push(s.to_string_lossy().into_owned()),
6786 Component::ParentDir => {
6787 parts.pop();
6788 }
6789 }
6790 }
6791 let s = parts.join("/");
6792 if starts_root {
6793 format!("/{s}")
6794 } else {
6795 s
6796 }
6797}
6798
6799fn resolve_js_ts_import_to_known_file(
6800 spec: &str,
6801 current_file: &str,
6802 known_paths: &HashSet<String>,
6803) -> Option<String> {
6804 let spec = spec.trim();
6805 if spec.is_empty() {
6806 return None;
6807 }
6808 if spec.starts_with('@') {
6809 return None;
6810 }
6811
6812 let try_extensions = |base: &str| -> Option<String> {
6813 let base = base.replace('\\', "/");
6814 for ext in ["", ".ts", ".tsx", ".js", ".jsx"] {
6815 let cand = format!("{base}{ext}");
6816 if known_paths.contains(&cand) {
6817 return Some(cand);
6818 }
6819 }
6820 for ext in [".ts", ".tsx", ".js", ".jsx"] {
6821 let cand = format!("{base}/index{ext}");
6822 if known_paths.contains(&cand) {
6823 return Some(cand);
6824 }
6825 }
6826 None
6827 };
6828
6829 if spec.starts_with('.') {
6830 let base = Path::new(current_file).parent()?;
6831 let joined = base.join(spec);
6832 let normalized = normalized_logical_path(&joined);
6833 return try_extensions(&normalized);
6834 }
6835
6836 let needle = spec.replace('\\', "/");
6837 known_paths
6838 .iter()
6839 .filter(|p| {
6840 let pn = p.replace('\\', "/");
6841 (pn.ends_with(".ts")
6842 || pn.ends_with(".tsx")
6843 || pn.ends_with(".js")
6844 || pn.ends_with(".jsx"))
6845 && pn.contains(needle.as_str())
6846 })
6847 .min_by_key(|p| p.len())
6848 .cloned()
6849}
6850
6851fn extract_js_ts_graph_symbols(
6852 tree: &Tree,
6853 source: &str,
6854 file_path: &str,
6855 language: LanguageId,
6856) -> Vec<FunctionSymbol> {
6857 let _ = language;
6858 let mut out = Vec::new();
6859 let mut seen = HashSet::new();
6860 let root = tree.root_node();
6861
6862 let mut push = |logical: String, name: String| {
6863 let fqn = non_java_file_scoped_fqn(file_path, &logical);
6864 if seen.insert(fqn.clone()) {
6865 out.push(FunctionSymbol {
6866 name,
6867 fqn,
6868 class_fqn: None,
6869 return_type: None,
6870 param_types: Vec::new(),
6871 param_count: 0,
6872 modifiers: Vec::new(),
6873 is_pointer_receiver: None,
6874 });
6875 }
6876 };
6877
6878 walk_tree(root, |node| {
6879 let kind = node.kind();
6880 if kind == "function_declaration" {
6881 if js_inside_class(node) {
6882 return;
6883 }
6884 let Some(nm) = js_function_declaration_name(node, source) else {
6885 return;
6886 };
6887 let logical = js_nested_function_declaration_prefix(node, source, &nm);
6888 push(logical, nm);
6889 return;
6890 }
6891
6892 if is_js_ts_class_member_fn(kind) {
6893 let Some(meth) = js_property_name_text(node, source) else {
6894 return;
6895 };
6896 let cls = js_enclosing_class_declaration_name(node, source)
6897 .unwrap_or_else(|| "anonymous_class".to_string());
6898 let logical = format!("{cls}.{meth}");
6899 push(logical, meth);
6900 return;
6901 }
6902
6903 if kind == "variable_declarator" {
6904 let Some(val) = node.child_by_field_name("value") else {
6905 return;
6906 };
6907 if !matches!(val.kind(), "arrow_function" | "function_expression") {
6908 return;
6909 }
6910 let Some(name_n) = node.child_by_field_name("name") else {
6911 return;
6912 };
6913 if name_n.kind() != "identifier" {
6914 return;
6915 }
6916 let Some(var_name) = node_text_slice(name_n, source) else {
6917 return;
6918 };
6919 let var_name = var_name.trim().to_string();
6920 if var_name.is_empty() {
6921 return;
6922 }
6923 if js_inside_class(node) {
6924 let cls = js_enclosing_class_declaration_name(node, source)
6925 .unwrap_or_else(|| "anonymous_class".to_string());
6926 let logical = format!("{cls}.{var_name}");
6927 push(logical, var_name);
6928 } else {
6929 let logical = js_prefix_from_ancestors_for_expr(val, source, &var_name);
6930 push(logical, var_name);
6931 }
6932 }
6933 });
6934
6935 out
6936}
6937
6938fn js_ts_innermost_enclosing_logical(call: Node, source: &str) -> Option<String> {
6939 let mut cur = call.parent();
6940 while let Some(p) = cur {
6941 match p.kind() {
6942 "function_declaration" => {
6943 if js_inside_class(p) {
6944 cur = p.parent();
6945 continue;
6946 }
6947 let nm = js_function_declaration_name(p, source)?;
6948 return Some(js_nested_function_declaration_prefix(p, source, &nm));
6949 }
6950 k if is_js_ts_class_member_fn(k) => {
6951 let meth = js_property_name_text(p, source)?;
6952 let cls = js_enclosing_class_declaration_name(p, source)
6953 .unwrap_or_else(|| "anonymous_class".to_string());
6954 return Some(format!("{cls}.{meth}"));
6955 }
6956 "arrow_function" | "function_expression" => {
6957 let mut up = Some(p);
6958 while let Some(x) = up {
6959 if x.kind() == "variable_declarator" {
6960 let name_n = x.child_by_field_name("name")?;
6961 if name_n.kind() != "identifier" {
6962 return None;
6963 }
6964 let vn = node_text_slice(name_n, source)?;
6965 let vn = vn.trim();
6966 if vn.is_empty() {
6967 return None;
6968 }
6969 return Some(js_prefix_from_ancestors_for_expr(p, source, vn));
6970 }
6971 up = x.parent();
6972 }
6973 }
6974 _ => {}
6975 }
6976 cur = p.parent();
6977 }
6978 None
6979}
6980
6981fn extract_js_ts_intrafile_calls(
6982 tree: &Tree,
6983 source: &str,
6984 file_path: &str,
6985 language: LanguageId,
6986 name_to_fqn: &HashMap<String, String>,
6987) -> Vec<(String, String)> {
6988 let _ = language;
6989 let mut calls = Vec::new();
6990 walk_tree(tree.root_node(), |inner| {
6991 if inner.kind() != "call_expression" {
6992 return;
6993 }
6994 let Some(logical) = js_ts_innermost_enclosing_logical(inner, source) else {
6995 return;
6996 };
6997 let caller_fqn = non_java_file_scoped_fqn(file_path, &logical);
6998 let Some(func_n) = inner.child_by_field_name("function") else {
6999 return;
7000 };
7001 if func_n.kind() != "identifier" {
7002 return;
7003 }
7004 let Some(callee_name) = node_text_slice(func_n, source) else {
7005 return;
7006 };
7007 let callee_name = callee_name.trim();
7008 if let Some(callee_fqn) = name_to_fqn.get(callee_name) {
7009 calls.push((caller_fqn, callee_fqn.clone()));
7010 }
7011 });
7012 calls
7013}
7014
7015fn extract_java_spring_endpoints(source: &str) -> Vec<(Vec<String>, String, String)> {
7022 let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
7023 let mut class_base_path: Option<String> = None;
7024 let mut pending_methods: Vec<String> = Vec::new();
7025 let mut pending_path: Option<String> = None;
7026
7027 for line in source.lines() {
7028 let trimmed = line.trim();
7029
7030 if trimmed.starts_with("@RequestMapping") && !trimmed.contains("method") {
7032 if let Some(path) = extract_java_annotation_path(trimmed) {
7033 class_base_path = Some(path);
7034 }
7035 continue;
7036 }
7037
7038 if trimmed.starts_with("@RestController") || trimmed.starts_with("@Controller") {
7040 continue;
7041 }
7042
7043 if trimmed.starts_with('@') {
7045 for (attr, verb) in [
7046 ("@GetMapping", "GET"),
7047 ("@PostMapping", "POST"),
7048 ("@PutMapping", "PUT"),
7049 ("@DeleteMapping", "DELETE"),
7050 ("@PatchMapping", "PATCH"),
7051 ] {
7052 if trimmed.starts_with(attr) {
7053 pending_methods.push(verb.to_string());
7054 if let Some(path) = extract_java_annotation_path(trimmed) {
7055 pending_path = Some(path);
7056 } else {
7057 pending_path = Some("/".to_string());
7058 }
7059 }
7060 }
7061
7062 if trimmed.starts_with("@RequestMapping") {
7064 if let Some(path) = extract_java_annotation_path(trimmed) {
7065 pending_path = Some(path);
7066 }
7067 if trimmed.contains("RequestMethod.GET") {
7069 pending_methods.push("GET".to_string());
7070 } else if trimmed.contains("RequestMethod.POST") {
7071 pending_methods.push("POST".to_string());
7072 } else if trimmed.contains("RequestMethod.PUT") {
7073 pending_methods.push("PUT".to_string());
7074 } else if trimmed.contains("RequestMethod.DELETE") {
7075 pending_methods.push("DELETE".to_string());
7076 } else if pending_path.is_some() && pending_methods.is_empty() {
7077 pending_methods.push("ANY".to_string());
7078 }
7079 }
7080 continue;
7081 }
7082
7083 if (trimmed.starts_with("public ")
7085 || trimmed.starts_with("private ")
7086 || trimmed.starts_with("protected "))
7087 && trimmed.contains('(')
7088 && !pending_methods.is_empty()
7089 {
7090 let before_paren = match trimmed.split_once('(') {
7091 Some((before, _)) => before,
7092 None => continue,
7093 };
7094 let mut last = None;
7095 for p in before_paren.split_whitespace() {
7096 last = Some(p);
7097 }
7098 let method_name = match last {
7099 Some(name) => name.trim().to_string(),
7100 None => continue,
7101 };
7102
7103 let path_template = if let Some(ref base) = class_base_path {
7104 let method_path = pending_path.clone().unwrap_or_else(|| "/".to_string());
7105 if method_path.starts_with('/') {
7106 format!("{}{}", base.trim_end_matches('/'), method_path)
7107 } else {
7108 format!("{}/{}", base.trim_end_matches('/'), method_path)
7109 }
7110 } else {
7111 pending_path.clone().unwrap_or_else(|| format!("/{}", method_name))
7112 };
7113
7114 endpoints.push((pending_methods.clone(), path_template, method_name));
7115
7116 pending_methods.clear();
7117 pending_path = None;
7118 }
7119 }
7120
7121 endpoints
7122}
7123
7124fn extract_java_annotation_path(attr_line: &str) -> Option<String> {
7126 if let Some(start) = attr_line.find('"') {
7128 let rest = &attr_line[start + 1..];
7129 if let Some(end) = rest.find('"') {
7130 let path = &rest[..end];
7131 if !path.is_empty() {
7132 return Some(path.to_string());
7133 }
7134 }
7135 }
7136 None
7137}
7138
7139fn extract_java_used_classes(
7148 tree: &Tree,
7149 source: &str,
7150 package: Option<&str>,
7151) -> Vec<(String, String)> {
7152 let mut uses: Vec<(String, String)> = Vec::new();
7153 let root = tree.root_node();
7154 let import_map = build_internal_import_map(source);
7155
7156 walk_tree(root, |node| {
7157 if node.kind() != "method_declaration" {
7158 return;
7159 }
7160
7161 let method_name = match identifier_text_from_children(node, source) {
7162 Some(name) => name,
7163 None => return,
7164 };
7165
7166 let mut parent = node.parent();
7168 let mut class_fqn: Option<String> = None;
7169 while let Some(p) = parent {
7170 let pk = p.kind();
7171 if pk == "class_declaration" || pk == "interface_declaration" {
7172 if let Some(class_name) = identifier_text_from_children(p, source) {
7173 let full = if let Some(pkg) = package {
7174 format!("{pkg}.{class_name}")
7175 } else {
7176 class_name
7177 };
7178 class_fqn = Some(full);
7179 }
7180 break;
7181 }
7182 parent = p.parent();
7183 }
7184
7185 let func_fqn = if let Some(ref cls) = class_fqn {
7186 format!("{cls}.{}", method_name)
7187 } else if let Some(pkg) = package {
7188 format!("{pkg}.{}", method_name)
7189 } else {
7190 method_name.clone()
7191 };
7192
7193 walk_tree(node, |child| {
7195 let kind = child.kind();
7196
7197 if kind == "type_identifier" || kind == "object_creation_expression" {
7199 let type_name = if kind == "object_creation_expression" {
7200 child.child_by_field_name("type")
7202 .and_then(|t| {
7203 let start = t.start_byte() as usize;
7204 let end = t.end_byte() as usize;
7205 if end <= source.len() && start < end {
7206 Some(source[start..end].to_string())
7207 } else {
7208 None
7209 }
7210 })
7211 } else {
7212 let start = child.start_byte() as usize;
7213 let end = child.end_byte() as usize;
7214 if end <= source.len() && start < end {
7215 Some(source[start..end].to_string())
7216 } else {
7217 None
7218 }
7219 };
7220
7221 if let Some(type_name) = type_name {
7222 if is_java_primitive_or_builtin(&type_name) {
7224 return;
7225 }
7226
7227 let used_class_fqn = if let Some(fqn) = import_map.get(&type_name) {
7229 fqn.clone()
7230 } else if let Some(pkg) = package {
7231 format!("{pkg}.{type_name}")
7232 } else {
7233 type_name
7234 };
7235
7236 uses.push((func_fqn.clone(), used_class_fqn));
7237 }
7238 }
7239 });
7240 });
7241
7242 uses
7243}
7244
7245fn is_java_primitive_or_builtin(name: &str) -> bool {
7247 matches!(
7248 name,
7249 "int" | "long" | "short" | "byte" | "float" | "double" | "boolean" | "char" | "void"
7250 | "String" | "Integer" | "Long" | "Short" | "Byte" | "Float" | "Double" | "Boolean"
7251 | "Character" | "Object" | "Class" | "Void"
7252 | "List" | "ArrayList" | "Map" | "HashMap" | "Set" | "HashSet"
7253 | "Optional" | "Collection" | "Iterator"
7254 )
7255}
7256
7257struct GoCallGraphEdges {
7258 calls_function: Vec<(String, String)>,
7259 uses_class: Vec<(String, String)>,
7260 class_uses_class: Vec<(String, String)>,
7261}
7262
7263fn collect_go_call_graph_edges(
7264 tree: &Tree,
7265 source: &str,
7266 package_name: Option<&str>,
7267) -> GoCallGraphEdges {
7268 let mut calls_function = extract_go_calls(tree, source, package_name);
7269 calls_function.extend(extract_go_goroutine_calls(tree, source, package_name));
7270 GoCallGraphEdges {
7271 calls_function,
7272 uses_class: extract_go_used_types(tree, source, package_name),
7273 class_uses_class: extract_go_embedding(tree, source, package_name),
7274 }
7275}
7276
7277fn apply_go_call_graph_to_ir(accumulator: &mut IrEdgeAccumulator, edges: GoCallGraphEdges) {
7278 for (caller, callee) in edges.calls_function {
7279 accumulator.calls_function.insert((caller, callee));
7280 }
7281 for (fn_fqn, struct_fqn) in edges.uses_class {
7282 accumulator.uses_class.insert((fn_fqn, struct_fqn));
7283 }
7284 for (derived, base) in edges.class_uses_class {
7285 accumulator.class_uses_class.insert((derived, base));
7286 }
7287}
7288
7289fn apply_go_call_graph_to_batch(accumulator: &mut BatchAccumulator, edges: GoCallGraphEdges) {
7290 for (caller, callee) in edges.calls_function {
7291 accumulator.add_calls_function(caller, callee);
7292 }
7293 for (fn_fqn, struct_fqn) in edges.uses_class {
7294 accumulator.add_uses_class(fn_fqn, struct_fqn);
7295 }
7296 for (derived, base) in edges.class_uses_class {
7297 accumulator.add_class_uses_class(derived, base);
7298 }
7299}
7300
7301async fn persist_go_structure(
7306 graph: &Graph,
7307 file_path: &str,
7308 file: &ParsedFile,
7309 source: &str,
7310 known_paths: &HashSet<String>,
7311 project_name: Option<String>,
7312 accumulator: &mut BatchAccumulator,
7313 scan_root: &Path,
7314 go_modules: &[GoModule],
7315 go_replaces: &[GoReplace],
7316 persistence: &GraphPersistenceOptions,
7317 compressor: Option<&CompressorClient>,
7318) -> Result<(), GraphError> {
7319 let language = file.language.to_string();
7320 let package_name = extract_go_package(source);
7321
7322 emit_limited_parse_warnings(
7323 "Go",
7324 file_path,
7325 extract_go_parse_warnings(&file.tree, source),
7326 persistence.max_parse_warnings_per_file,
7327 );
7328
7329 let (structs, functions) = extract_go_symbols(&file.tree, source, package_name.as_deref());
7331 let class_spans = extract_go_class_spans(&file.tree, source, package_name.as_deref());
7332 let function_spans = extract_go_function_body_spans(&file.tree, source, package_name.as_deref());
7333
7334 for strct in &structs {
7336 let class_kind = strct.kind.map(|k| k.to_string());
7337 let code_bytes = code_bytes_for_span(
7338 compressor,
7339 source,
7340 class_spans.get(&strct.fqn).copied(),
7341 LanguageId::Go,
7342 )
7343 .await;
7344 let q = query(
7345 "
7346 MATCH (f:File { path: $path })
7347 MERGE (c:Class { fqn: $class_fqn })
7348 ON CREATE SET c.name = $class_name,
7349 c.path = $path,
7350 c.project_name = $project_name,
7351 c.language = $language,
7352 c.kind = $class_kind,
7353 c.code_bytes = $code_bytes
7354 ON MATCH SET c.name = $class_name,
7355 c.path = $path,
7356 c.project_name = $project_name,
7357 c.language = $language,
7358 c.kind = coalesce($class_kind, c.kind),
7359 c.code_bytes = coalesce($code_bytes, c.code_bytes)
7360 MERGE (f)-[:DECLARES_CLASS]->(c)
7361 ",
7362 )
7363 .param("path", file_path.to_string())
7364 .param("class_fqn", strct.fqn.clone())
7365 .param("class_name", strct.name.clone())
7366 .param("project_name", project_name.clone())
7367 .param("language", language.clone())
7368 .param("class_kind", class_kind)
7369 .param(props::CODE_BYTES, code_bytes);
7370
7371 graph.run(q).await?;
7372 }
7373
7374 for func in &functions {
7376 let code_bytes = code_bytes_for_span(
7377 compressor,
7378 source,
7379 function_spans.get(&func.fqn).copied(),
7380 LanguageId::Go,
7381 )
7382 .await;
7383 match &func.class_fqn {
7384 Some(class_fqn) => {
7385 let q = query(
7386 "
7387 MATCH (f:File { path: $path })
7388 MERGE (cls:Class { fqn: $class_fqn })
7389 MERGE (fn:Function { fqn: $fn_fqn })
7390 ON CREATE SET fn.name = $fn_name,
7391 fn.path = $path,
7392 fn.project_name = $project_name,
7393 fn.language = $language,
7394 fn.is_pointer_receiver = $is_pointer_receiver,
7395 fn.code_bytes = $code_bytes
7396 ON MATCH SET fn.name = $fn_name,
7397 fn.path = $path,
7398 fn.project_name = $project_name,
7399 fn.language = $language,
7400 fn.is_pointer_receiver = coalesce($is_pointer_receiver, fn.is_pointer_receiver),
7401 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
7402 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
7403 MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
7404 ",
7405 )
7406 .param("path", file_path.to_string())
7407 .param("class_fqn", class_fqn.clone())
7408 .param("fn_fqn", func.fqn.clone())
7409 .param("fn_name", func.name.clone())
7410 .param("project_name", project_name.clone())
7411 .param("language", language.clone())
7412 .param("is_pointer_receiver", func.is_pointer_receiver)
7413 .param(props::CODE_BYTES, code_bytes.clone());
7414
7415 graph.run(q).await?;
7416 }
7417 None => {
7418 let q = query(
7419 "
7420 MATCH (f:File { path: $path })
7421 MERGE (fn:Function { fqn: $fn_fqn })
7422 ON CREATE SET fn.name = $fn_name,
7423 fn.path = $path,
7424 fn.project_name = $project_name,
7425 fn.language = $language,
7426 fn.is_pointer_receiver = $is_pointer_receiver,
7427 fn.code_bytes = $code_bytes
7428 ON MATCH SET fn.name = $fn_name,
7429 fn.path = $path,
7430 fn.project_name = $project_name,
7431 fn.language = $language,
7432 fn.is_pointer_receiver = coalesce($is_pointer_receiver, fn.is_pointer_receiver),
7433 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
7434 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
7435 ",
7436 )
7437 .param("path", file_path.to_string())
7438 .param("fn_fqn", func.fqn.clone())
7439 .param("fn_name", func.name.clone())
7440 .param("project_name", project_name.clone())
7441 .param("language", language.clone())
7442 .param("is_pointer_receiver", func.is_pointer_receiver)
7443 .param(props::CODE_BYTES, code_bytes);
7444
7445 graph.run(q).await?;
7446 }
7447 }
7448 }
7449
7450 let endpoints = extract_go_http_endpoints(source);
7452 for (methods, path_template, handler_name) in endpoints {
7453 let norm_path = normalize_api_path(&path_template);
7454
7455 let api_query = query(
7456 "
7457 MERGE (api:ApiEndpoint { path: $path })
7458 ON CREATE SET api.methods = $methods,
7459 api.protocol = 'http',
7460 api.framework = 'go-http',
7461 api.project_name = $project_name,
7462 api.norm_path = $norm_path
7463 ON MATCH SET api.methods = $methods,
7464 api.protocol = coalesce(api.protocol, 'http'),
7465 api.framework = coalesce(api.framework, 'go-http'),
7466 api.project_name = coalesce(api.project_name, $project_name),
7467 api.norm_path = coalesce(api.norm_path, $norm_path)
7468 ",
7469 )
7470 .param("path", path_template.clone())
7471 .param("methods", methods.clone())
7472 .param("project_name", project_name.clone())
7473 .param("norm_path", norm_path.clone());
7474
7475 graph.run(api_query).await?;
7476
7477 for func in &functions {
7479 if func.name == handler_name {
7480 let rel_query = query(
7481 "
7482 MERGE (fn:Function { fqn: $fn_fqn })
7483 MERGE (api:ApiEndpoint { path: $path })
7484 MERGE (api)-[:HANDLED_BY]->(fn)
7485 ",
7486 )
7487 .param("fn_fqn", func.fqn.clone())
7488 .param("path", path_template.clone());
7489
7490 graph.run(rel_query).await?;
7491 }
7492 }
7493 }
7494
7495 let external_urls = extract_external_http_urls(source);
7497 for full_url in external_urls {
7498 let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
7499 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
7500 let base_url = format!("{protocol}://{host}");
7501 let name = host.clone();
7502 let norm_path = normalize_api_path(&path);
7503
7504 let ext_query = query(
7505 "
7506 MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
7507 ON CREATE SET ext.name = $name,
7508 ext.path = $path,
7509 ext.protocol = $protocol,
7510 ext.provider = $provider,
7511 ext.project_name = $project_name
7512 ON MATCH SET ext.name = coalesce(ext.name, $name),
7513 ext.path = coalesce(ext.path, $path),
7514 ext.protocol = coalesce(ext.protocol, $protocol),
7515 ext.provider = coalesce(ext.provider, $provider),
7516 ext.project_name = coalesce(ext.project_name, $project_name)
7517 ",
7518 )
7519 .param("name", name.clone())
7520 .param("base_url", base_url.clone())
7521 .param("path", path.clone())
7522 .param("norm_path", norm_path.clone())
7523 .param("protocol", protocol.clone())
7524 .param("provider", name.clone())
7525 .param("project_name", project_name.clone());
7526
7527 graph.run(ext_query).await?;
7528
7529 for func in &functions {
7531 accumulator.add_calls_external_api(
7532 func.fqn.clone(),
7533 base_url.clone(),
7534 norm_path.clone(),
7535 );
7536 }
7537 }
7538
7539 apply_go_call_graph_to_batch(
7540 accumulator,
7541 collect_go_call_graph_edges(&file.tree, source, package_name.as_deref()),
7542 );
7543
7544 for imp in extract_go_imports(&file.tree, source) {
7545 if let Some(dep_path) =
7546 resolve_go_import_to_known_go_file(
7547 &imp,
7548 known_paths,
7549 go_modules,
7550 go_replaces,
7551 Some(scan_root),
7552 )
7553 {
7554 let dep_query = query(
7555 "
7556 MERGE (src:File { path: $src_path })
7557 MERGE (dst:File { path: $dst_path })
7558 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
7559 ",
7560 )
7561 .param("src_path", file_path.to_string())
7562 .param("dst_path", dep_path.clone());
7563
7564 graph.run(dep_query).await?;
7565 } else if should_log_unresolved_import(
7566 persistence.verbose_imports,
7567 is_go_stdlib_import(&imp),
7568 is_likely_third_party_go_import(&imp),
7569 ) {
7570 println!("Go import (unresolved to scanned files): `{}` in {}", imp, file_path);
7571 }
7572 }
7573
7574 Ok(())
7575}
7576
7577fn extract_go_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
7579 let mut out = Vec::new();
7580 let root = tree.root_node();
7581 walk_tree(root, |node| {
7582 if node.kind() == "ERROR" || node.is_missing() {
7583 let pos = node.start_position();
7584 let s = node.start_byte() as usize;
7585 let e = (node.end_byte() as usize).min(source.len());
7586 let snippet = if s < e {
7587 source[s..e]
7588 .chars()
7589 .take(120)
7590 .collect::<String>()
7591 .replace('\n', " ")
7592 } else {
7593 String::new()
7594 };
7595 out.push((pos.row + 1, pos.column + 1, snippet));
7596 }
7597 });
7598 out
7599}
7600
7601fn go_go_callee_fqn_from_call_expression(
7602 call: Node,
7603 source: &str,
7604 package: Option<&str>,
7605) -> Option<String> {
7606 let func_node = call.child_by_field_name("function")?;
7607 let s = func_node.start_byte() as usize;
7608 let e = (func_node.end_byte() as usize).min(source.len());
7609 if s >= e {
7610 return None;
7611 }
7612 let callee_name = source[s..e].trim().to_string();
7613 if callee_name.is_empty() {
7614 return None;
7615 }
7616 Some(if callee_name.contains('.') {
7617 callee_name
7618 } else if let Some(pkg) = package {
7619 format!("{pkg}.{callee_name}")
7620 } else {
7621 callee_name
7622 })
7623}
7624
7625fn go_decl_fqn_from_function_declaration(
7626 decl: Node,
7627 source: &str,
7628 package: Option<&str>,
7629) -> Option<String> {
7630 let name_node = decl.child_by_field_name("name")?;
7631 let s = name_node.start_byte() as usize;
7632 let e = (name_node.end_byte() as usize).min(source.len());
7633 if s >= e {
7634 return None;
7635 }
7636 let name = source[s..e].to_string();
7637 Some(match package {
7638 Some(pkg) => format!("{pkg}.{name}"),
7639 None => name,
7640 })
7641}
7642
7643fn go_decl_fqn_from_method_declaration(
7644 decl: Node,
7645 source: &str,
7646 package: Option<&str>,
7647) -> Option<String> {
7648 let name_node = decl.child_by_field_name("name")?;
7649 let s = name_node.start_byte() as usize;
7650 let e = (name_node.end_byte() as usize).min(source.len());
7651 if s >= e {
7652 return None;
7653 }
7654 let method_name = source[s..e].to_string();
7655 let receiver_type = decl.child_by_field_name("receiver").and_then(|r| {
7656 let mut type_name = None;
7657 walk_tree(r, |n| {
7658 if n.kind() == "type_identifier" {
7659 let s = n.start_byte() as usize;
7660 let e = (n.end_byte() as usize).min(source.len());
7661 if e <= source.len() && s < e {
7662 type_name = Some(source[s..e].to_string());
7663 }
7664 }
7665 });
7666 type_name
7667 })?;
7668 let class_fqn = match package {
7669 Some(pkg) => format!("{pkg}.{receiver_type}"),
7670 None => receiver_type,
7671 };
7672 Some(format!("{class_fqn}.{method_name}"))
7673}
7674
7675fn go_enclosing_func_decl_fqn_from_inner(node: Node, source: &str, package: Option<&str>) -> Option<String> {
7676 let mut cur = node.parent();
7677 while let Some(n) = cur {
7678 match n.kind() {
7679 "function_declaration" => return go_decl_fqn_from_function_declaration(n, source, package),
7680 "method_declaration" => return go_decl_fqn_from_method_declaration(n, source, package),
7681 _ => cur = n.parent(),
7682 }
7683 }
7684 None
7685}
7686
7687fn extract_go_goroutine_calls(
7689 tree: &Tree,
7690 source: &str,
7691 package: Option<&str>,
7692) -> Vec<(String, String)> {
7693 let mut out = Vec::new();
7694 let root = tree.root_node();
7695 walk_tree(root, |node| {
7696 if node.kind() != "go_statement" {
7697 return;
7698 }
7699 let Some(caller) = go_enclosing_func_decl_fqn_from_inner(node, source, package) else {
7700 return;
7701 };
7702 let mut i = 0usize;
7703 while let Some(expr) = node.named_child(i) {
7704 i += 1;
7705 if expr.kind() == "call_expression" {
7706 if let Some(callee) = go_go_callee_fqn_from_call_expression(expr, source, package) {
7707 out.push((caller.clone(), callee));
7708 }
7709 break;
7710 }
7711 }
7712 });
7713 out
7714}
7715
7716fn go_type_leaf_for_embedding(mut n: Node) -> Node {
7717 loop {
7718 if n.kind() == "pointer_type" {
7719 if let Some(inner) = n.named_child(0) {
7720 n = inner;
7721 continue;
7722 }
7723 }
7724 return n;
7725 }
7726}
7727
7728fn go_embedding_type_fqn(type_node: Node, source: &str, package: Option<&str>) -> Option<String> {
7729 let leaf = go_type_leaf_for_embedding(type_node);
7730 let s = leaf.start_byte() as usize;
7731 let e = (leaf.end_byte() as usize).min(source.len());
7732 if s >= e {
7733 return None;
7734 }
7735 let raw = source[s..e].trim().to_string();
7736 if raw.is_empty() {
7737 return None;
7738 }
7739 Some(if raw.contains('.') {
7740 raw
7741 } else if let Some(pkg) = package {
7742 format!("{pkg}.{raw}")
7743 } else {
7744 raw
7745 })
7746}
7747
7748fn extract_go_embedding(
7750 tree: &Tree,
7751 source: &str,
7752 package: Option<&str>,
7753) -> Vec<(String, String)> {
7754 let mut out = Vec::new();
7755 let root = tree.root_node();
7756 walk_tree(root, |node| {
7757 if node.kind() != "type_declaration" {
7758 return;
7759 }
7760 let mut i = 0usize;
7761 while let Some(child) = node.child(i) {
7762 i += 1;
7763 if child.kind() != "type_spec" {
7764 continue;
7765 }
7766 let Some(type_n) = child.child_by_field_name("type") else {
7767 continue;
7768 };
7769 if type_n.kind() != "struct_type" {
7770 continue;
7771 }
7772 let Some(name_node) = child.child_by_field_name("name") else {
7773 continue;
7774 };
7775 let s = name_node.start_byte() as usize;
7776 let e = (name_node.end_byte() as usize).min(source.len());
7777 if s >= e {
7778 continue;
7779 }
7780 let struct_name = source[s..e].to_string();
7781 let struct_fqn = match package {
7782 Some(pkg) => format!("{pkg}.{struct_name}"),
7783 None => struct_name,
7784 };
7785 walk_tree(type_n, |fd| {
7786 if fd.kind() != "field_declaration" {
7787 return;
7788 }
7789 if fd.child_by_field_name("name").is_some() {
7790 return;
7791 }
7792 let Some(ty) = fd.child_by_field_name("type") else {
7793 return;
7794 };
7795 if let Some(emb) = go_embedding_type_fqn(ty, source, package) {
7796 out.push((struct_fqn.clone(), emb));
7797 }
7798 });
7799 }
7800 });
7801 out
7802}
7803
7804fn extract_go_imports(tree: &Tree, source: &str) -> Vec<String> {
7806 let mut out = Vec::new();
7807 let root = tree.root_node();
7808 walk_tree(root, |node| {
7809 if node.kind() != "import_spec" {
7810 return;
7811 }
7812 let Some(path_node) = node.child_by_field_name("path") else {
7813 return;
7814 };
7815 let s = path_node.start_byte() as usize;
7816 let e = (path_node.end_byte() as usize).min(source.len());
7817 if s >= e {
7818 return;
7819 }
7820 let raw = source[s..e].trim();
7821 let path = raw.trim_matches('`').trim_matches('"').to_string();
7822 if !path.is_empty() {
7823 out.push(path);
7824 }
7825 });
7826 out
7827}
7828
7829fn extract_go_package(source: &str) -> Option<String> {
7831 for line in source.lines() {
7832 let line = line.trim();
7833 if line.starts_with("package ") {
7834 let pkg = line["package ".len()..].trim();
7835 if !pkg.is_empty() {
7836 return Some(pkg.to_string());
7837 }
7838 }
7839 }
7840 None
7841}
7842
7843fn extract_go_symbols(
7845 tree: &Tree,
7846 source: &str,
7847 package: Option<&str>,
7848) -> (Vec<ClassSymbol>, Vec<FunctionSymbol>) {
7849 let mut structs: Vec<ClassSymbol> = Vec::new();
7850 let mut functions: Vec<FunctionSymbol> = Vec::new();
7851 let mut seen_struct_names: HashSet<String> = HashSet::new();
7852
7853 let root = tree.root_node();
7854 walk_tree(root, |node| {
7855 let kind = node.kind();
7856 match kind {
7857 "type_declaration" => {
7858 let mut i = 0usize;
7859 while let Some(child) = node.child(i) {
7860 i += 1;
7861 if child.kind() != "type_spec" {
7862 continue;
7863 }
7864 let Some(type_n) = child.child_by_field_name("type") else {
7865 continue;
7866 };
7867 let kind_label: Option<&'static str> = if type_n.kind() == "struct_type" {
7868 Some("struct")
7869 } else if type_n.kind() == "interface_type" {
7870 Some("interface")
7871 } else {
7872 continue;
7873 };
7874 let Some(name_node) = child.child_by_field_name("name") else {
7875 continue;
7876 };
7877 let start = name_node.start_byte() as usize;
7878 let end = (name_node.end_byte() as usize).min(source.len());
7879 if start >= end {
7880 continue;
7881 }
7882 let name = source[start..end].to_string();
7883 let fqn = if let Some(pkg) = package {
7884 format!("{pkg}.{name}")
7885 } else {
7886 name.clone()
7887 };
7888 if seen_struct_names.insert(fqn.clone()) {
7889 structs.push(ClassSymbol {
7890 name,
7891 fqn,
7892 kind: kind_label,
7893 });
7894 }
7895 }
7896 }
7897 "function_declaration" => {
7898 if let Some(name_node) = node.child_by_field_name("name") {
7899 let start = name_node.start_byte() as usize;
7900 let end = (name_node.end_byte() as usize).min(source.len());
7901 if end <= source.len() && start < end {
7902 let name = source[start..end].to_string();
7903 let fqn = if let Some(pkg) = package {
7904 format!("{pkg}.{name}")
7905 } else {
7906 name.clone()
7907 };
7908 functions.push(FunctionSymbol {
7909 name,
7910 fqn,
7911 class_fqn: None,
7912 return_type: None,
7913 param_types: Vec::new(),
7914 param_count: 0,
7915 modifiers: Vec::new(),
7916 is_pointer_receiver: None,
7917 });
7918 }
7919 }
7920 }
7921 "method_declaration" => {
7922 if let Some(name_node) = node.child_by_field_name("name") {
7924 let start = name_node.start_byte() as usize;
7925 let end = (name_node.end_byte() as usize).min(source.len());
7926 if end <= source.len() && start < end {
7927 let method_name = source[start..end].to_string();
7928
7929 let is_pointer_receiver = node
7930 .child_by_field_name("receiver")
7931 .map(|r| go_receiver_has_pointer(r))
7932 .unwrap_or(false);
7933
7934 let receiver_type = node
7936 .child_by_field_name("receiver")
7937 .and_then(|r| {
7938 let mut type_name = None;
7939 walk_tree(r, |n| {
7940 if n.kind() == "type_identifier" {
7941 let s = n.start_byte() as usize;
7942 let e = (n.end_byte() as usize).min(source.len());
7943 if e <= source.len() && s < e {
7944 type_name = Some(source[s..e].to_string());
7945 }
7946 }
7947 });
7948 type_name
7949 });
7950
7951 let class_fqn = receiver_type.map(|t| {
7952 if let Some(pkg) = package {
7953 format!("{pkg}.{t}")
7954 } else {
7955 t
7956 }
7957 });
7958
7959 let fqn = if let Some(ref cls) = class_fqn {
7960 format!("{cls}.{method_name}")
7961 } else if let Some(pkg) = package {
7962 format!("{pkg}.{method_name}")
7963 } else {
7964 method_name.clone()
7965 };
7966
7967 functions.push(FunctionSymbol {
7968 name: method_name,
7969 fqn,
7970 class_fqn,
7971 return_type: None,
7972 param_types: Vec::new(),
7973 param_count: 0,
7974 modifiers: Vec::new(),
7975 is_pointer_receiver: Some(is_pointer_receiver),
7976 });
7977 }
7978 }
7979 }
7980 _ => {}
7981 }
7982 });
7983
7984 (structs, functions)
7985}
7986
7987fn go_receiver_has_pointer(receiver: Node) -> bool {
7988 let mut ptr = false;
7989 walk_tree(receiver, |n| {
7990 if n.kind() == "pointer_type" {
7991 ptr = true;
7992 }
7993 });
7994 ptr
7995}
7996
7997fn extract_go_http_endpoints(source: &str) -> Vec<(Vec<String>, String, String)> {
7999 let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
8000
8001 for line in source.lines() {
8002 let trimmed = line.trim();
8003
8004 if trimmed.contains("HandleFunc(") || trimmed.contains("Handle(") {
8006 if let Some((path, handler)) = extract_go_handler_call(trimmed) {
8007 endpoints.push((vec!["ANY".to_string()], path, handler));
8008 }
8009 }
8010
8011 for (pattern, method) in [
8013 (".Get(", "GET"),
8014 (".Post(", "POST"),
8015 (".Put(", "PUT"),
8016 (".Delete(", "DELETE"),
8017 (".Patch(", "PATCH"),
8018 ] {
8019 if trimmed.contains(pattern) {
8020 if let Some((path, handler)) = extract_go_handler_call(trimmed) {
8021 endpoints.push((vec![method.to_string()], path, handler));
8022 }
8023 }
8024 }
8025
8026 for (pattern, method) in [
8028 (".GET(", "GET"),
8029 (".POST(", "POST"),
8030 (".PUT(", "PUT"),
8031 (".DELETE(", "DELETE"),
8032 (".PATCH(", "PATCH"),
8033 ] {
8034 if trimmed.contains(pattern) {
8035 if let Some((path, handler)) = extract_go_handler_call(trimmed) {
8036 endpoints.push((vec![method.to_string()], path, handler));
8037 }
8038 }
8039 }
8040
8041 }
8044
8045 endpoints
8046}
8047
8048fn extract_go_handler_call(line: &str) -> Option<(String, String)> {
8050 let first_quote = line.find('"')?;
8052 let rest = &line[first_quote + 1..];
8053 let second_quote = rest.find('"')?;
8054 let path = rest[..second_quote].to_string();
8055
8056 let after_path = &rest[second_quote + 1..];
8058 let comma_idx = after_path.find(',')?;
8059 let handler_part = &after_path[comma_idx + 1..];
8060
8061 let handler = handler_part
8063 .split(|c: char| c == ')' || c == '(' || c == ',')
8064 .next()?
8065 .trim()
8066 .to_string();
8067
8068 if handler.is_empty() || path.is_empty() {
8069 return None;
8070 }
8071
8072 Some((path, handler))
8073}
8074
8075fn extract_go_used_types(
8077 tree: &Tree,
8078 source: &str,
8079 package: Option<&str>,
8080) -> Vec<(String, String)> {
8081 let mut uses: Vec<(String, String)> = Vec::new();
8082 let root = tree.root_node();
8083
8084 walk_tree(root, |node| {
8085 let kind = node.kind();
8086 if kind != "function_declaration" && kind != "method_declaration" {
8087 return;
8088 }
8089
8090 let func_name = node
8091 .child_by_field_name("name")
8092 .and_then(|n| {
8093 let s = n.start_byte() as usize;
8094 let e = n.end_byte() as usize;
8095 if e <= source.len() && s < e {
8096 Some(source[s..e].to_string())
8097 } else {
8098 None
8099 }
8100 });
8101
8102 let func_name = match func_name {
8103 Some(n) => n,
8104 None => return,
8105 };
8106
8107 let func_fqn = if let Some(pkg) = package {
8108 format!("{pkg}.{func_name}")
8109 } else {
8110 func_name
8111 };
8112
8113 walk_tree(node, |child| {
8115 if child.kind() == "type_identifier" {
8116 let s = child.start_byte() as usize;
8117 let e = child.end_byte() as usize;
8118 if e <= source.len() && s < e {
8119 let type_name = source[s..e].to_string();
8120 if !is_go_builtin_type(&type_name) {
8122 let type_fqn = if let Some(pkg) = package {
8123 format!("{pkg}.{type_name}")
8124 } else {
8125 type_name
8126 };
8127 uses.push((func_fqn.clone(), type_fqn));
8128 }
8129 }
8130 }
8131 });
8132 });
8133
8134 uses
8135}
8136
8137fn is_go_builtin_type(name: &str) -> bool {
8139 matches!(
8140 name,
8141 "int" | "int8" | "int16" | "int32" | "int64"
8142 | "uint" | "uint8" | "uint16" | "uint32" | "uint64" | "uintptr"
8143 | "float32" | "float64" | "complex64" | "complex128"
8144 | "bool" | "string" | "byte" | "rune" | "error"
8145 )
8146}
8147
8148fn extract_go_calls(
8150 tree: &Tree,
8151 source: &str,
8152 package: Option<&str>,
8153) -> Vec<(String, String)> {
8154 let mut calls: Vec<(String, String)> = Vec::new();
8155 let root = tree.root_node();
8156
8157 walk_tree(root, |node| {
8158 let kind = node.kind();
8159 if kind != "function_declaration" && kind != "method_declaration" {
8160 return;
8161 }
8162
8163 let func_name = node
8164 .child_by_field_name("name")
8165 .and_then(|n| {
8166 let s = n.start_byte() as usize;
8167 let e = n.end_byte() as usize;
8168 if e <= source.len() && s < e {
8169 Some(source[s..e].to_string())
8170 } else {
8171 None
8172 }
8173 });
8174
8175 let func_name = match func_name {
8176 Some(n) => n,
8177 None => return,
8178 };
8179
8180 let caller_fqn = if let Some(pkg) = package {
8181 format!("{pkg}.{func_name}")
8182 } else {
8183 func_name
8184 };
8185
8186 walk_tree(node, |child| {
8188 if child.kind() == "call_expression" {
8189 if let Some(func_node) = child.child_by_field_name("function") {
8190 let s = func_node.start_byte() as usize;
8191 let e = func_node.end_byte() as usize;
8192 if e <= source.len() && s < e {
8193 let callee_name = source[s..e].to_string();
8194 let callee_fqn = if callee_name.contains('.') {
8196 callee_name
8198 } else if let Some(pkg) = package {
8199 format!("{pkg}.{callee_name}")
8200 } else {
8201 callee_name
8202 };
8203 calls.push((caller_fqn.clone(), callee_fqn));
8204 }
8205 }
8206 }
8207 });
8208 });
8209
8210 calls
8211}
8212
8213#[derive(Debug, Clone)]
8215pub struct ExtractOptions {
8216 pub verbose_imports: bool,
8217 pub max_parse_warnings_per_file: usize,
8218 pub compressor: CompressorConfig,
8220}
8221
8222impl Default for ExtractOptions {
8223 fn default() -> Self {
8224 Self {
8225 verbose_imports: false,
8226 max_parse_warnings_per_file: 50,
8227 compressor: CompressorConfig::default(),
8228 }
8229 }
8230}
8231
8232pub fn function_body_spans_for_file(
8234 file: &ParsedFile,
8235 file_path: &str,
8236 source: &str,
8237) -> HashMap<String, (usize, usize)> {
8238 match file.language {
8239 LanguageId::Java => {
8240 let package = extract_java_package(source);
8241 extract_java_method_body_spans(&file.tree, source, package.as_deref())
8242 }
8243 LanguageId::CSharp => {
8244 let namespace = extract_csharp_namespace(&file.tree, source);
8245 extract_csharp_method_body_spans_map(&file.tree, source, namespace.as_deref())
8246 }
8247 LanguageId::Go => {
8248 let package = extract_go_package(source);
8249 extract_go_function_body_spans(&file.tree, source, package.as_deref())
8250 }
8251 LanguageId::Erlang => {
8252 let module = resolve_erlang_module_name(&file.path, &file.tree, source);
8253 module
8254 .as_ref()
8255 .map(|m| extract_erlang_function_spans(m, &file.tree, source))
8256 .unwrap_or_default()
8257 }
8258 _ => extract_non_java_function_body_spans(file, source, file_path),
8259 }
8260}
8261
8262pub fn class_body_spans_for_file(
8264 file: &ParsedFile,
8265 source: &str,
8266) -> HashMap<String, (usize, usize)> {
8267 match file.language {
8268 LanguageId::Go => {
8269 let package = extract_go_package(source);
8270 extract_go_class_spans(&file.tree, source, package.as_deref())
8271 }
8272 LanguageId::CSharp => extract_csharp_class_spans(&file.tree, source),
8273 _ => HashMap::new(),
8274 }
8275}
8276
8277pub fn property_body_spans_for_file(
8279 file: &ParsedFile,
8280 source: &str,
8281) -> HashMap<String, (usize, usize)> {
8282 match file.language {
8283 LanguageId::CSharp => extract_csharp_property_spans(&file.tree, source),
8284 _ => HashMap::new(),
8285 }
8286}
8287
8288pub async fn enrich_project_ir_code_bytes(
8290 ir: &mut ProjectIr,
8291 root: &Path,
8292 files: &[ParsedFile],
8293 config: &CompressorConfig,
8294) -> Result<(), crate::compress::CompressError> {
8295 if !config.enabled {
8296 return Ok(());
8297 }
8298 let client = match CompressorClient::from_config(config) {
8299 Ok(c) => c,
8300 Err(e) => {
8301 eprintln!("RedCompressor: failed to create client ({e}); skipping code_bytes");
8302 return Ok(());
8303 }
8304 };
8305 if let Err(e) = client.health_check().await {
8306 eprintln!("RedCompressor: health check failed ({e}); compression may be unavailable");
8307 }
8308
8309 let mut by_path: HashMap<String, &ParsedFile> = HashMap::new();
8310 for file in files {
8311 by_path.insert(neo4j_path_string(root, &file.path), file);
8312 }
8313
8314 for func in &mut ir.functions {
8315 let Some(file) = by_path.get(&func.path) else {
8316 continue;
8317 };
8318 let spans = function_body_spans_for_file(file, &func.path, &file.source);
8319 let Some(span) = spans.get(&func.fqn).copied() else {
8320 continue;
8321 };
8322 func.code_bytes = code_bytes_for_span(
8323 Some(&client),
8324 &file.source,
8325 Some(span),
8326 file.language,
8327 )
8328 .await;
8329 }
8330
8331 for module in &mut ir.modules {
8332 let Some(file) = by_path.get(&module.path) else {
8333 continue;
8334 };
8335 module.code_bytes =
8336 compress_full_source(&file.source, LanguageId::Erlang, &client).await;
8337 }
8338
8339 for class in &mut ir.classes {
8340 let Some(file) = by_path.get(&class.path) else {
8341 continue;
8342 };
8343 let spans = class_body_spans_for_file(file, &file.source);
8344 let Some(span) = spans.get(&class.fqn).copied() else {
8345 continue;
8346 };
8347 class.code_bytes = code_bytes_for_span(
8348 Some(&client),
8349 &file.source,
8350 Some(span),
8351 file.language,
8352 )
8353 .await;
8354 }
8355
8356 Ok(())
8357}
8358
8359#[derive(Debug, Default)]
8360struct IrEdgeAccumulator {
8361 calls_function: HashSet<(String, String)>,
8362 uses_class: HashSet<(String, String)>,
8363 class_uses_class: HashSet<(String, String)>,
8364}
8365
8366impl IrEdgeAccumulator {
8367 fn flush_into(&self, edges: &mut Vec<EdgeIr>) {
8368 for (caller, callee) in &self.calls_function {
8369 edges.push(EdgeIr {
8370 kind: EdgeKind::CallsFunction,
8371 from_label: "Function".into(),
8372 from_key: caller.clone(),
8373 to_label: "Function".into(),
8374 to_key: callee.clone(),
8375 });
8376 }
8377 for (fn_fqn, cls_fqn) in &self.uses_class {
8378 edges.push(EdgeIr {
8379 kind: EdgeKind::UsesClass,
8380 from_label: "Function".into(),
8381 from_key: fn_fqn.clone(),
8382 to_label: "Class".into(),
8383 to_key: cls_fqn.clone(),
8384 });
8385 }
8386 for (derived, base) in &self.class_uses_class {
8387 edges.push(EdgeIr {
8388 kind: EdgeKind::ClassUsesClass,
8389 from_label: "Class".into(),
8390 from_key: derived.clone(),
8391 to_label: "Class".into(),
8392 to_key: base.clone(),
8393 });
8394 }
8395 }
8396}
8397
8398fn push_depends_on_file(edges: &mut Vec<EdgeIr>, src: &str, dst: &str) {
8399 edges.push(EdgeIr {
8400 kind: EdgeKind::DependsOnFile,
8401 from_label: "File".into(),
8402 from_key: src.to_string(),
8403 to_label: "File".into(),
8404 to_key: dst.to_string(),
8405 });
8406}
8407
8408fn push_declares_function(edges: &mut Vec<EdgeIr>, from_label: &str, from_key: &str, fqn: &str) {
8409 edges.push(EdgeIr {
8410 kind: EdgeKind::DeclaresFunction,
8411 from_label: from_label.into(),
8412 from_key: from_key.to_string(),
8413 to_label: "Function".into(),
8414 to_key: fqn.to_string(),
8415 });
8416}
8417
8418pub fn build_project_ir(
8420 root: &Path,
8421 files: &[ParsedFile],
8422 _options: &ExtractOptions,
8423) -> ProjectIr {
8424 use crate::ir::{ApiEndpointIr, FileIr};
8425
8426 let mut ir = ProjectIr::empty();
8427 let mut accumulator = IrEdgeAccumulator::default();
8428
8429 let known_paths: HashSet<String> = files
8430 .iter()
8431 .map(|f| neo4j_path_string(root, &f.path))
8432 .collect();
8433
8434 let go_modules = discover_go_modules(root, false).unwrap_or_default();
8435 let go_replaces = discover_go_replaces(root, false).unwrap_or_default();
8436 let csharp_batch_index = build_csharp_batch_index(files, root);
8437 let erlang_module_index = build_erlang_module_index(files);
8438
8439 for file in files {
8440 let path = neo4j_path_string(root, &file.path);
8441 let language = file.language.to_string();
8442 let project_name = derive_project_name(&file.path, root);
8443 let source = &file.source;
8444
8445 ir.files.push(FileIr {
8446 path: path.clone(),
8447 language: language.clone(),
8448 framework: None,
8449 project_name: project_name.clone(),
8450 });
8451
8452 match file.language {
8453 LanguageId::Java => {
8454 append_java_class_ir(&mut ir, &path, project_name.clone(), &file.tree, source);
8455 let package = extract_java_package(source);
8456 let (_, methods) =
8457 extract_java_symbols(&file.tree, source, package.as_deref());
8458 for func in &methods {
8459 ir.functions.push(FunctionIr {
8460 name: func.name.clone(),
8461 fqn: func.fqn.clone(),
8462 path: path.clone(),
8463 language: language.clone(),
8464 framework: None,
8465 project_name: project_name.clone(),
8466 arity: None,
8467 return_type: func.return_type.clone(),
8468 param_count: Some(func.param_count as u32),
8469 param_types: func.param_types.clone(),
8470 code_bytes: None,
8471 });
8472 if let Some(class_fqn) = &func.class_fqn {
8473 push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
8474 }
8475 push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
8476 }
8477 for (derived, base) in
8478 extract_java_inheritance_edges(&file.tree, source, package.as_deref())
8479 {
8480 accumulator.class_uses_class.insert((derived, base));
8481 }
8482 for (cls, dep) in
8483 extract_java_injected_dependencies(&file.tree, source, package.as_deref())
8484 {
8485 accumulator.class_uses_class.insert((cls, dep));
8486 }
8487 for (caller, callee) in extract_java_calls(&file.tree, source, package.as_deref()) {
8488 accumulator.calls_function.insert((caller, callee));
8489 }
8490 for import_fqn in extract_internal_java_imports(source) {
8491 if let Some(dep_path) = map_import_to_project_path(&path, &import_fqn) {
8492 if known_paths.contains(&dep_path) {
8493 push_depends_on_file(&mut ir.edges, &path, &dep_path);
8494 }
8495 }
8496 }
8497 for (http_methods, path_template, handler_name) in extract_java_spring_endpoints(source)
8498 {
8499 let norm_path = normalize_api_path(&path_template);
8500 ir.api_endpoints.push(ApiEndpointIr {
8501 methods: http_methods.clone(),
8502 path: path_template.clone(),
8503 protocol: Some("http".into()),
8504 framework: Some("spring".into()),
8505 project_name: project_name.clone(),
8506 });
8507 if let Some(handler_fqn) =
8508 resolve_java_handler_fqn(&http_methods, &handler_name, &ir)
8509 {
8510 ir.edges.push(EdgeIr {
8511 kind: EdgeKind::HandlesApi,
8512 from_label: "ApiEndpoint".into(),
8513 from_key: api_endpoint_key(&http_methods, &path_template),
8514 to_label: "Function".into(),
8515 to_key: handler_fqn,
8516 });
8517 }
8518 let _ = norm_path;
8519 }
8520 }
8521 LanguageId::CSharp => {
8522 append_csharp_structural_ir(
8523 &mut ir,
8524 &path,
8525 project_name.clone(),
8526 &file.tree,
8527 source,
8528 );
8529 let namespace = extract_csharp_namespace(&file.tree, source);
8530 let using_summary = extract_csharp_using_summary(&file.tree, source);
8531 let (classes, methods, _) = extract_csharp_symbols(&file.tree, source);
8532 for func in &methods {
8533 ir.functions.push(FunctionIr {
8534 name: func.name.clone(),
8535 fqn: func.fqn.clone(),
8536 path: path.clone(),
8537 language: language.clone(),
8538 framework: None,
8539 project_name: project_name.clone(),
8540 arity: None,
8541 return_type: func.return_type.clone(),
8542 param_count: Some(func.param_count as u32),
8543 param_types: func.param_types.clone(),
8544 code_bytes: None,
8545 });
8546 if let Some(class_fqn) = &func.class_fqn {
8547 push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
8548 }
8549 push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
8550 }
8551 for ns in &using_summary.namespace_imports {
8552 let Some(dep_paths) = csharp_batch_index.namespace_to_paths.get(ns) else {
8553 continue;
8554 };
8555 for dep_path in dep_paths {
8556 if dep_path == &path || !known_paths.contains(dep_path) {
8557 continue;
8558 }
8559 push_depends_on_file(&mut ir.edges, &path, dep_path);
8560 }
8561 }
8562 for (derived, base) in extract_csharp_class_inheritance_edges(
8563 &file.tree,
8564 source,
8565 namespace.as_deref(),
8566 &using_summary,
8567 &csharp_batch_index,
8568 ) {
8569 accumulator.class_uses_class.insert((derived, base));
8570 }
8571 for (fn_fqn, class_fqn) in extract_csharp_used_classes(
8572 &file.tree,
8573 source,
8574 namespace.as_deref(),
8575 &using_summary,
8576 &csharp_batch_index,
8577 ) {
8578 accumulator.uses_class.insert((fn_fqn, class_fqn));
8579 }
8580 for (caller, callee) in extract_csharp_calls(
8581 &file.tree,
8582 source,
8583 namespace.as_deref(),
8584 &using_summary,
8585 &csharp_batch_index,
8586 ) {
8587 accumulator.calls_function.insert((caller, callee));
8588 }
8589 let _ = classes;
8590 }
8591 LanguageId::Erlang => {
8592 extract_erlang_to_ir(
8593 &mut ir,
8594 &mut accumulator,
8595 file,
8596 &path,
8597 source,
8598 project_name.clone(),
8599 &language,
8600 &erlang_module_index,
8601 );
8602 }
8603 LanguageId::Go => {
8604 let package_name = extract_go_package(source);
8605 let (structs, functions) =
8606 extract_go_symbols(&file.tree, source, package_name.as_deref());
8607 for s in &structs {
8608 ir.classes.push(ClassIr {
8609 fqn: s.fqn.clone(),
8610 name: s.name.clone(),
8611 path: path.clone(),
8612 language: language.clone(),
8613 project_name: project_name.clone(),
8614 kind: s.kind.map(str::to_string),
8615 code_bytes: None,
8616 });
8617 ir.edges.push(EdgeIr {
8618 kind: EdgeKind::DeclaresClass,
8619 from_label: "File".into(),
8620 from_key: path.clone(),
8621 to_label: "Class".into(),
8622 to_key: s.fqn.clone(),
8623 });
8624 }
8625 for func in &functions {
8626 ir.functions.push(FunctionIr {
8627 name: func.name.clone(),
8628 fqn: func.fqn.clone(),
8629 path: path.clone(),
8630 language: language.clone(),
8631 framework: None,
8632 project_name: project_name.clone(),
8633 arity: None,
8634 return_type: func.return_type.clone(),
8635 param_count: Some(func.param_count as u32),
8636 param_types: func.param_types.clone(),
8637 code_bytes: None,
8638 });
8639 if let Some(class_fqn) = &func.class_fqn {
8640 push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
8641 }
8642 push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
8643 }
8644 for import_path in extract_go_imports(&file.tree, source) {
8645 if let Some(dep) = resolve_go_import_to_known_go_file(
8646 &import_path,
8647 &known_paths,
8648 &go_modules,
8649 &go_replaces,
8650 Some(root),
8651 ) {
8652 push_depends_on_file(&mut ir.edges, &path, &dep);
8653 }
8654 }
8655 apply_go_call_graph_to_ir(
8656 &mut accumulator,
8657 collect_go_call_graph_edges(&file.tree, source, package_name.as_deref()),
8658 );
8659 }
8660 _ => {
8661 extract_non_java_to_ir(
8662 &mut ir,
8663 &mut accumulator,
8664 file,
8665 &path,
8666 source,
8667 project_name.clone(),
8668 &language,
8669 &known_paths,
8670 _options,
8671 );
8672 }
8673 }
8674 }
8675
8676 accumulator.flush_into(&mut ir.edges);
8677 apply_same_api_edges(&mut ir);
8678 ir
8679}
8680
8681fn resolve_java_handler_fqn(
8682 _methods: &[String],
8683 handler_name: &str,
8684 ir: &ProjectIr,
8685) -> Option<String> {
8686 ir.functions
8687 .iter()
8688 .find(|f| f.name == handler_name)
8689 .map(|f| f.fqn.clone())
8690}
8691
8692fn extract_erlang_to_ir(
8693 ir: &mut ProjectIr,
8694 accumulator: &mut IrEdgeAccumulator,
8695 file: &ParsedFile,
8696 file_path: &str,
8697 source: &str,
8698 project_name: Option<String>,
8699 language: &str,
8700 erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
8701) {
8702 use crate::ir::{ApiEndpointIr, BehaviourIr, CallbackIr, ExternalApiIr, ModuleIr};
8703
8704 let module_name = resolve_erlang_module_name(&file.path, &file.tree, source);
8705 let erlang_meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, source);
8706
8707 if let Some(module_name) = module_name.as_ref() {
8708 ir.modules.push(ModuleIr {
8709 name: module_name.clone(),
8710 path: file_path.to_string(),
8711 language: language.to_string(),
8712 framework: None,
8713 project_name: project_name.clone(),
8714 code_bytes: None,
8715 });
8716 ir.edges.push(EdgeIr {
8717 kind: EdgeKind::DeclaresModule,
8718 from_label: "File".into(),
8719 from_key: file_path.to_string(),
8720 to_label: "Module".into(),
8721 to_key: module_key(module_name, file_path),
8722 });
8723 }
8724
8725 let functions = if let Some(module_name) = module_name.as_ref() {
8726 extract_erlang_functions(module_name, &file.tree, source)
8727 } else {
8728 Vec::new()
8729 };
8730
8731 for (fun_name, arity, fqn) in &functions {
8732 ir.functions.push(FunctionIr {
8733 name: fun_name.clone(),
8734 fqn: fqn.clone(),
8735 path: file_path.to_string(),
8736 language: language.to_string(),
8737 framework: None,
8738 project_name: project_name.clone(),
8739 arity: Some(*arity),
8740 return_type: None,
8741 param_count: None,
8742 param_types: vec![],
8743 code_bytes: None,
8744 });
8745 push_declares_function(&mut ir.edges, "File", file_path, fqn);
8746 if let Some(module_name) = module_name.as_ref() {
8747 push_declares_function(&mut ir.edges, "Module", &module_key(module_name, file_path), fqn);
8748 }
8749 }
8750
8751 let callback_contracts = collect_callback_contracts_for_module(
8752 module_name.as_deref(),
8753 &erlang_meta.behaviour_usages,
8754 &erlang_meta.declared_callbacks,
8755 &erlang_meta.optional_callbacks,
8756 );
8757
8758 if let Some(module_name) = module_name.as_ref() {
8759 for behaviour in &erlang_meta.behaviour_usages {
8760 ir.behaviours.push(BehaviourIr {
8761 name: behaviour.clone(),
8762 path: None,
8763 language: Some(language.to_string()),
8764 project_name: project_name.clone(),
8765 });
8766 ir.edges.push(EdgeIr {
8767 kind: EdgeKind::ImplementsBehaviour,
8768 from_label: "Module".into(),
8769 from_key: module_key(module_name, file_path),
8770 to_label: "Behaviour".into(),
8771 to_key: behaviour.clone(),
8772 });
8773 let dep_path = guess_erlang_file_path_from_module(file_path, behaviour);
8774 push_depends_on_file(&mut ir.edges, file_path, &dep_path);
8775 }
8776
8777 if !erlang_meta.declared_callbacks.is_empty() {
8778 ir.behaviours.push(BehaviourIr {
8779 name: module_name.clone(),
8780 path: Some(file_path.to_string()),
8781 language: Some(language.to_string()),
8782 project_name: project_name.clone(),
8783 });
8784 ir.edges.push(EdgeIr {
8785 kind: EdgeKind::DeclaresBehaviour,
8786 from_label: "File".into(),
8787 from_key: file_path.to_string(),
8788 to_label: "Behaviour".into(),
8789 to_key: module_name.clone(),
8790 });
8791 }
8792
8793 for parent in &erlang_meta.behaviour_extensions {
8794 ir.edges.push(EdgeIr {
8795 kind: EdgeKind::ExtendsBehaviour,
8796 from_label: "Behaviour".into(),
8797 from_key: module_name.clone(),
8798 to_label: "Behaviour".into(),
8799 to_key: parent.clone(),
8800 });
8801 }
8802 }
8803
8804 for contract in &callback_contracts {
8805 let callback_fqn = format!(
8806 "{}:{}/{}",
8807 contract.behaviour, contract.name, contract.arity
8808 );
8809 ir.callbacks.push(CallbackIr {
8810 name: contract.name.clone(),
8811 fqn: callback_fqn.clone(),
8812 arity: contract.arity,
8813 optional: contract.optional,
8814 language: Some(language.to_string()),
8815 project_name: project_name.clone(),
8816 });
8817 ir.edges.push(EdgeIr {
8818 kind: EdgeKind::DeclaresCallback,
8819 from_label: "Behaviour".into(),
8820 from_key: contract.behaviour.clone(),
8821 to_label: "Callback".into(),
8822 to_key: callback_fqn.clone(),
8823 });
8824 }
8825
8826 let function_by_sig: HashMap<(String, u32), String> = functions
8827 .iter()
8828 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
8829 .collect();
8830
8831 for contract in &callback_contracts {
8832 let key = (contract.name.clone(), contract.arity);
8833 let Some(fn_fqn) = function_by_sig.get(&key) else {
8834 continue;
8835 };
8836 let callback_fqn = format!(
8837 "{}:{}/{}",
8838 contract.behaviour, contract.name, contract.arity
8839 );
8840 ir.edges.push(EdgeIr {
8841 kind: EdgeKind::ImplementsCallback,
8842 from_label: "Function".into(),
8843 from_key: fn_fqn.clone(),
8844 to_label: "Callback".into(),
8845 to_key: callback_fqn.clone(),
8846 });
8847 }
8848
8849 for (name, arity) in &erlang_meta.overridden_callbacks {
8850 let key = (name.clone(), *arity);
8851 let Some(fn_fqn) = function_by_sig.get(&key) else {
8852 continue;
8853 };
8854 for contract in callback_contracts
8855 .iter()
8856 .filter(|c| c.name == *name && c.arity == *arity)
8857 {
8858 let callback_fqn = format!(
8859 "{}:{}/{}",
8860 contract.behaviour, contract.name, contract.arity
8861 );
8862 ir.edges.push(EdgeIr {
8863 kind: EdgeKind::OverridesCallback,
8864 from_label: "Function".into(),
8865 from_key: fn_fqn.clone(),
8866 to_label: "Callback".into(),
8867 to_key: callback_fqn,
8868 });
8869 }
8870 }
8871
8872 for (methods, path_template, handler_module) in extract_erlang_api_endpoints(&file.tree, source)
8873 {
8874 let methods_owned: Vec<String> = methods.iter().map(|s| s.to_string()).collect();
8875 ir.api_endpoints.push(ApiEndpointIr {
8876 methods: methods_owned.clone(),
8877 path: path_template.clone(),
8878 protocol: Some("http".into()),
8879 framework: Some("cowboy".into()),
8880 project_name: project_name.clone(),
8881 });
8882 for fqn in select_endpoint_handler_fqns(&handler_module, erlang_module_index) {
8883 ir.edges.push(EdgeIr {
8884 kind: EdgeKind::HandlesApi,
8885 from_label: "ApiEndpoint".into(),
8886 from_key: api_endpoint_key(&methods_owned, &path_template),
8887 to_label: "Function".into(),
8888 to_key: fqn,
8889 });
8890 }
8891 }
8892
8893 for full_url in extract_external_http_urls_from_tree(&file.tree, source) {
8894 let (protocol_opt, host, url_path) = split_url_protocol_host_and_path(&full_url);
8895 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
8896 let base_url = format!("{protocol}://{host}");
8897 let norm_path = normalize_api_path(&url_path);
8898 ir.external_apis.push(ExternalApiIr {
8899 name: host.clone(),
8900 base_url: Some(base_url.clone()),
8901 protocol: Some(protocol),
8902 provider: Some(host),
8903 service: None,
8904 norm_path: Some(norm_path.clone()),
8905 });
8906 for (_, _, fqn) in &functions {
8907 ir.edges.push(EdgeIr {
8908 kind: EdgeKind::CallsExternalApi,
8909 from_label: "Function".into(),
8910 from_key: fqn.clone(),
8911 to_label: "ExternalApi".into(),
8912 to_key: external_api_key(&base_url, &norm_path),
8913 });
8914 }
8915 }
8916
8917 if let Some(module_name) = module_name.as_ref() {
8918 let function_by_sig: HashMap<(String, u32), String> = functions
8919 .iter()
8920 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
8921 .collect();
8922 for (caller, callee) in extract_erlang_call_edges(
8923 &file.tree,
8924 source,
8925 Some(module_name.as_str()),
8926 &function_by_sig,
8927 ) {
8928 accumulator.calls_function.insert((caller, callee));
8929 }
8930 }
8931}
8932
8933fn extract_non_java_to_ir(
8934 ir: &mut ProjectIr,
8935 accumulator: &mut IrEdgeAccumulator,
8936 file: &ParsedFile,
8937 file_path: &str,
8938 source: &str,
8939 project_name: Option<String>,
8940 language: &str,
8941 known_paths: &HashSet<String>,
8942 options: &ExtractOptions,
8943) {
8944 let functions = extract_non_java_function_symbols(file, source, file_path);
8945 let mut name_to_fqn_depth: HashMap<String, (String, usize)> = HashMap::new();
8946 for f in &functions {
8947 let logical = f
8948 .fqn
8949 .split_once("::")
8950 .map(|(_, l)| l)
8951 .unwrap_or(f.fqn.as_str());
8952 let (short, depth) = non_java_short_name_and_depth(file.language, logical);
8953 name_to_fqn_depth
8954 .entry(short)
8955 .and_modify(|(existing_fqn, existing_depth)| {
8956 if depth > *existing_depth {
8957 *existing_fqn = f.fqn.clone();
8958 *existing_depth = depth;
8959 }
8960 })
8961 .or_insert_with(|| (f.fqn.clone(), depth));
8962 }
8963 let name_to_fqn: HashMap<String, String> = name_to_fqn_depth
8964 .into_iter()
8965 .map(|(k, (v, _))| (k, v))
8966 .collect();
8967
8968 for func in &functions {
8969 ir.functions.push(FunctionIr {
8970 name: func.name.clone(),
8971 fqn: func.fqn.clone(),
8972 path: file_path.to_string(),
8973 language: language.to_string(),
8974 framework: None,
8975 project_name: project_name.clone(),
8976 arity: None,
8977 return_type: func.return_type.clone(),
8978 param_count: Some(func.param_count as u32),
8979 param_types: func.param_types.clone(),
8980 code_bytes: None,
8981 });
8982 push_declares_function(&mut ir.edges, "File", file_path, &func.fqn);
8983 }
8984
8985 match file.language {
8986 LanguageId::Python => {
8987 for imp in extract_python_import_modules(&file.tree, source) {
8988 if let Some(dep) = resolve_python_import_to_known_file(&imp, known_paths) {
8989 push_depends_on_file(&mut ir.edges, file_path, &dep);
8990 } else if should_log_unresolved_import(
8991 options.verbose_imports,
8992 is_python_stdlib_top_level(&imp),
8993 is_python_common_external_top_level(&imp),
8994 ) {
8995 eprintln!(
8996 "Python import (unresolved to scanned files): `{imp}` in {file_path}"
8997 );
8998 }
8999 }
9000 for (caller, callee) in
9001 extract_python_intrafile_calls(&file.tree, source, file_path, &name_to_fqn)
9002 {
9003 accumulator.calls_function.insert((caller, callee));
9004 }
9005 }
9006 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
9007 for spec in extract_js_ts_import_specifiers(&file.tree, source) {
9008 if let Some(dep) =
9009 resolve_js_ts_import_to_known_file(&spec, file_path, known_paths)
9010 {
9011 push_depends_on_file(&mut ir.edges, file_path, &dep);
9012 } else if options.verbose_imports {
9013 eprintln!("JS/TS import (unresolved): `{spec}` in {file_path}");
9014 }
9015 }
9016 for (caller, callee) in extract_js_ts_intrafile_calls(
9017 &file.tree,
9018 source,
9019 file_path,
9020 file.language,
9021 &name_to_fqn,
9022 ) {
9023 accumulator.calls_function.insert((caller, callee));
9024 }
9025 }
9026 LanguageId::Rust => {
9027 for use_path in extract_rust_use_paths(&file.tree, source) {
9028 if let Some(dep) =
9029 resolve_rust_use_to_known_file(&use_path, file_path, known_paths)
9030 {
9031 push_depends_on_file(&mut ir.edges, file_path, &dep);
9032 } else if options.verbose_imports {
9033 eprintln!(
9034 "Rust use (unresolved to scanned files): `{}` in {file_path}",
9035 use_path.join("::")
9036 );
9037 }
9038 }
9039 for (caller, callee) in
9040 extract_rust_intrafile_calls(&file.tree, source, file_path, &name_to_fqn)
9041 {
9042 accumulator.calls_function.insert((caller, callee));
9043 }
9044 }
9045 _ => {}
9046 }
9047}
9048
9049fn apply_same_api_edges(ir: &mut ProjectIr) {
9050 use crate::ir::{api_endpoint_key, external_api_key};
9051 for ep in &ir.api_endpoints {
9052 let ep_norm = normalize_api_path(&ep.path);
9053 for ext in &ir.external_apis {
9054 if ext.norm_path.as_deref() == Some(ep_norm.as_str()) {
9055 ir.edges.push(EdgeIr {
9056 kind: EdgeKind::SameApi,
9057 from_label: "ApiEndpoint".into(),
9058 from_key: api_endpoint_key(&ep.methods, &ep.path),
9059 to_label: "ExternalApi".into(),
9060 to_key: if let (Some(b), Some(n)) = (&ext.base_url, &ext.norm_path) {
9061 external_api_key(b, n)
9062 } else {
9063 ext.name.clone()
9064 },
9065 });
9066 }
9067 }
9068 }
9069}
9070
9071pub async fn persist_project_ir_to_neo4j(
9076 cfg: &Neo4jConfig,
9077 ir: &ProjectIr,
9078 clean: bool,
9079) -> Result<(), GraphError> {
9080 let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
9081
9082 if clean {
9083 graph.run(query("MATCH (n) DETACH DELETE n")).await?;
9084 }
9085
9086 for f in &ir.files {
9087 let q = query(
9088 "
9089 MERGE (n:File { path: $path })
9090 SET n.language = $language, n.project_name = $project_name
9091 ",
9092 )
9093 .param("path", f.path.clone())
9094 .param("language", f.language.clone())
9095 .param("project_name", f.project_name.clone());
9096 graph.run(q).await?;
9097 }
9098
9099 for m in &ir.modules {
9100 let q = query(
9101 "
9102 MERGE (n:Module { name: $name, path: $path })
9103 ON CREATE SET n.language = $language, n.project_name = $project_name,
9104 n.code_bytes = $code_bytes
9105 ON MATCH SET n.language = $language, n.project_name = $project_name,
9106 n.code_bytes = coalesce($code_bytes, n.code_bytes)
9107 ",
9108 )
9109 .param("name", m.name.clone())
9110 .param("path", m.path.clone())
9111 .param("language", m.language.clone())
9112 .param("project_name", m.project_name.clone())
9113 .param(props::CODE_BYTES, m.code_bytes.clone());
9114 graph.run(q).await?;
9115 }
9116
9117 for c in &ir.classes {
9118 let q = query(
9119 "
9120 MERGE (n:Class { fqn: $fqn })
9121 ON CREATE SET n.name = $name, n.path = $path, n.language = $language,
9122 n.project_name = $project_name, n.kind = $kind,
9123 n.code_bytes = $code_bytes
9124 ON MATCH SET n.name = $name, n.path = $path, n.language = $language,
9125 n.project_name = $project_name, n.kind = coalesce($kind, n.kind),
9126 n.code_bytes = coalesce($code_bytes, n.code_bytes)
9127 ",
9128 )
9129 .param("fqn", c.fqn.clone())
9130 .param("name", c.name.clone())
9131 .param("path", c.path.clone())
9132 .param("language", c.language.clone())
9133 .param("project_name", c.project_name.clone())
9134 .param("kind", c.kind.clone())
9135 .param(props::CODE_BYTES, c.code_bytes.clone());
9136 graph.run(q).await?;
9137 }
9138
9139 for f in &ir.functions {
9140 let q = query(
9141 "
9142 MERGE (n:Function { fqn: $fqn })
9143 ON CREATE SET n.name = $name, n.path = $path, n.language = $language,
9144 n.project_name = $project_name, n.arity = $arity,
9145 n.return_type = $return_type, n.param_count = $param_count,
9146 n.code_bytes = $code_bytes
9147 ON MATCH SET n.name = $name, n.path = $path, n.language = $language,
9148 n.project_name = $project_name, n.arity = $arity,
9149 n.return_type = $return_type, n.param_count = $param_count,
9150 n.code_bytes = coalesce($code_bytes, n.code_bytes)
9151 ",
9152 )
9153 .param("fqn", f.fqn.clone())
9154 .param("name", f.name.clone())
9155 .param("path", f.path.clone())
9156 .param("language", f.language.clone())
9157 .param("project_name", f.project_name.clone())
9158 .param("arity", f.arity.map(|a| a as i64))
9159 .param("return_type", f.return_type.clone())
9160 .param("param_count", f.param_count.map(|c| c as i64))
9161 .param(props::CODE_BYTES, f.code_bytes.clone());
9162 graph.run(q).await?;
9163 }
9164
9165 for edge in &ir.edges {
9166 let rel = edge.kind.to_rel_type().to_string();
9167 let q = match edge.from_label.as_str() {
9168 "Module" if edge.to_label == "Function" || edge.to_label == "Module" => {
9169 let (name, path) = parse_module_key(&edge.from_key).unwrap_or_default();
9170 let cypher = format!(
9171 "
9172 MERGE (a:Module {{ name: $from_name, path: $from_path }})
9173 MERGE (b:{lbl_b} {{ {key_b}: $to_key }})
9174 MERGE (a)-[:{rel}]->(b)
9175 ",
9176 lbl_b = edge.to_label,
9177 key_b = stable_key_property(&edge.to_label),
9178 rel = rel,
9179 );
9180 query(&cypher)
9181 .param("from_name", name)
9182 .param("from_path", path)
9183 .param("to_key", edge.to_key.clone())
9184 }
9185 _ => {
9186 let cypher = format!(
9187 "
9188 MERGE (a:{lbl_a} {{ {key_a}: $from_key }})
9189 MERGE (b:{lbl_b} {{ {key_b}: $to_key }})
9190 MERGE (a)-[:{rel}]->(b)
9191 ",
9192 lbl_a = edge.from_label,
9193 key_a = stable_key_property(&edge.from_label),
9194 lbl_b = edge.to_label,
9195 key_b = stable_key_property(&edge.to_label),
9196 rel = rel,
9197 );
9198 query(&cypher)
9199 .param("from_key", edge.from_key.clone())
9200 .param("to_key", edge.to_key.clone())
9201 }
9202 };
9203 graph.run(q).await?;
9204 }
9205
9206 Ok(())
9207}
9208
9209fn parse_module_key(key: &str) -> Option<(String, String)> {
9210 key.split_once('@').map(|(n, p)| (n.to_string(), p.to_string()))
9211}
9212
9213fn stable_key_property(label: &str) -> &'static str {
9214 match label {
9215 "File" => "path",
9216 "Module" => "name",
9217 "Function" | "Class" | "Property" | "Callback" => "fqn",
9218 "Behaviour" => "name",
9219 "ApiEndpoint" => "path",
9220 "ExternalApi" => "name",
9221 _ => "name",
9222 }
9223}
9224
9225#[cfg(test)] mod tests {
9228 use super::*;
9229 use crate::ir::{EdgeKind, ProjectIr};
9230 use crate::scanner::ParsedFile;
9231 use crate::{parse_once, LanguageId};
9232 use std::collections::HashSet;
9233 use std::path::{Path, PathBuf};
9234 #[test]
9237 fn append_csharp_structural_ir_populates_classes_properties_and_edges() {
9238 let src = r#"
9239namespace Ns {
9240 public class C {
9241 public string Name { get; set; }
9242 }
9243}
9244"#;
9245 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9246 let mut ir = ProjectIr::empty();
9247 append_csharp_structural_ir(&mut ir, "/src/C.cs", None, &tree, src);
9248 assert!(ir.classes.iter().any(|c| c.fqn == "Ns.C"));
9249 assert!(ir.properties.iter().any(|p| p.fqn == "Ns.C.Name"));
9250 assert!(
9251 ir.edges
9252 .iter()
9253 .any(|e| e.kind == EdgeKind::DeclaresClass && e.to_key == "Ns.C")
9254 );
9255 assert!(ir
9256 .edges
9257 .iter()
9258 .any(|e| e.kind == EdgeKind::DeclaresProperty && e.to_key == "Ns.C.Name"));
9259 }
9260
9261 #[test]
9262 fn csharp_using_summary_parses_ast_and_filters_system_microsoft() {
9263 let src = r#"
9264using System;
9265using System.Collections.Generic;
9266using Microsoft.Extensions.Logging;
9267using OtherNs;
9268using static System.Math;
9269using AliasType = OtherNs.SomeType;
9270
9271namespace ConsumerNs { class C { void M() { } } }
9272"#;
9273 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9274 let sum = extract_csharp_using_summary(&tree, src);
9275 assert!(sum.namespace_imports.contains(&"OtherNs".to_string()));
9276 assert!(!sum
9277 .namespace_imports
9278 .iter()
9279 .any(|n| n.starts_with("System") || n.starts_with("Microsoft")));
9280 assert_eq!(
9281 sum.alias_map.get("AliasType").map(String::as_str),
9282 Some("OtherNs.SomeType")
9283 );
9284 }
9285
9286 #[test]
9287 fn csharp_global_using_included_in_summary() {
9288 let src = r#"
9289global using SharedNs;
9290
9291namespace N { class C { } }
9292"#;
9293 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9294 let sum = extract_csharp_using_summary(&tree, src);
9295 assert!(sum.namespace_imports.contains(&"SharedNs".to_string()));
9296 }
9297
9298 #[test]
9299 fn csharp_api_endpoints_multiline_http_get_attribute() {
9300 let src = r#"
9301namespace N {
9302 public class C {
9303 [HttpGet(
9304 "/x")]
9305 public void GetIt() { }
9306 }
9307}
9308"#;
9309 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9310 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
9311 assert_eq!(eps.len(), 1);
9312 assert_eq!(eps[0].0, vec!["GET"]);
9313 assert_eq!(eps[0].1, "/x");
9314 assert_eq!(eps[0].2, "GetIt");
9315 }
9316
9317 #[test]
9318 fn csharp_api_endpoints_class_route_plus_method_http_get() {
9319 let src = r#"
9320namespace N {
9321 [Route("api/v1")]
9322 public class OrdersController {
9323 [HttpGet("orders")]
9324 public void Get() { }
9325 }
9326}
9327"#;
9328 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9329 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
9330 assert_eq!(eps.len(), 1);
9331 assert_eq!(eps[0].0, vec!["GET"]);
9332 assert_eq!(eps[0].1, "/api/v1/orders");
9333 assert_eq!(eps[0].2, "Get");
9334 }
9335
9336 #[test]
9337 fn csharp_api_endpoints_single_line_http_get_regression() {
9338 let src = r#"
9339namespace N {
9340 public class C {
9341 [HttpGet("/api/orders")]
9342 public void List() { }
9343 }
9344}
9345"#;
9346 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9347 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
9348 assert_eq!(eps.len(), 1);
9349 assert_eq!(eps[0].1, "/api/orders");
9350 assert_eq!(eps[0].2, "List");
9351 }
9352
9353 #[test]
9354 fn csharp_api_endpoints_comment_with_fake_attribute_not_parsed() {
9355 let src = r#"
9356namespace N {
9357 public class C {
9358 // [HttpGet("/fake")]
9359 [HttpGet("/real")]
9360 public void A() { }
9361 }
9362}
9363"#;
9364 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9365 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
9366 assert_eq!(eps.len(), 1);
9367 assert_eq!(eps[0].1, "/real");
9368 }
9369
9370 #[test]
9371 fn csharp_api_endpoints_comment_only_does_not_create_endpoint() {
9372 let src = r#"
9373namespace N {
9374 public class C {
9375 // [HttpGet("/fake")]
9376 public void M() { }
9377 }
9378}
9379"#;
9380 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9381 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
9382 assert!(eps.is_empty(), "expected no endpoints, got {eps:?}");
9383 }
9384
9385 #[test]
9386 fn csharp_symbols_nested_class_fqns() {
9387 let src = r#"
9388namespace N {
9389 public class Outer {
9390 public class Inner {
9391 public void M() { }
9392 }
9393 }
9394}
9395"#;
9396 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9397 let (classes, methods, _) = extract_csharp_symbols(&tree, src);
9398 let fqns: Vec<&str> = classes.iter().map(|c| c.fqn.as_str()).collect();
9399 assert!(fqns.contains(&"N.Outer"));
9400 assert!(fqns.contains(&"N.Outer.Inner"));
9401 let m = methods.iter().find(|f| f.name == "M").expect("method M");
9402 assert_eq!(m.class_fqn.as_deref(), Some("N.Outer.Inner"));
9403 assert_eq!(m.fqn, "N.Outer.Inner.M");
9404 }
9405
9406 #[test]
9407 fn csharp_symbols_enum_kind_and_fqn() {
9408 let src = r#"
9409namespace N {
9410 public enum Color { Red, Green }
9411}
9412"#;
9413 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9414 let (classes, _, _) = extract_csharp_symbols(&tree, src);
9415 let e = classes.iter().find(|c| c.name == "Color").expect("enum Color");
9416 assert_eq!(e.fqn, "N.Color");
9417 assert_eq!(e.kind, Some("enum"));
9418 }
9419
9420 #[test]
9421 fn csharp_symbols_record_kind_and_fqn() {
9422 let src = r#"
9423namespace N {
9424 public record Person(string Name);
9425}
9426"#;
9427 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9428 let (classes, _, _) = extract_csharp_symbols(&tree, src);
9429 let r = classes.iter().find(|c| c.name == "Person").expect("record Person");
9430 assert_eq!(r.fqn, "N.Person");
9431 assert_eq!(r.kind, Some("record"));
9432 }
9433
9434 #[test]
9435 fn csharp_namespace_nested_blocks_in_fqn() {
9436 let src = r#"
9437namespace A {
9438 namespace B {
9439 public class C {
9440 public void M() {}
9441 }
9442 }
9443}
9444"#;
9445 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9446 let (classes, methods, _) = extract_csharp_symbols(&tree, src);
9447 let c = classes.iter().find(|x| x.name == "C").expect("class C");
9448 assert_eq!(c.fqn, "A.B.C");
9449 let m = methods.iter().find(|f| f.name == "M").expect("method M");
9450 assert_eq!(m.fqn, "A.B.C.M");
9451 }
9452
9453 #[test]
9454 fn csharp_namespace_file_scoped_extracts() {
9455 let src = "namespace Ns;\npublic class X { }\n";
9456 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9457 assert_eq!(
9458 extract_csharp_namespace(&tree, src).as_deref(),
9459 Some("Ns")
9460 );
9461 let (classes, _, _) = extract_csharp_symbols(&tree, src);
9462 let x = classes.iter().find(|c| c.name == "X").expect("class X");
9463 assert_eq!(x.fqn, "Ns.X");
9464 }
9465
9466 #[test]
9467 fn csharp_collect_file_namespace_strings_two_roots() {
9468 let src = r#"
9469namespace A { public class Ca { } }
9470namespace B { public class Cb { } }
9471"#;
9472 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9473 let mut ns = collect_csharp_file_namespace_strings(&tree, src);
9474 ns.sort();
9475 assert_eq!(ns, vec!["A".to_string(), "B".to_string()]);
9476 }
9477
9478 #[test]
9479 fn csharp_class_inheritance_edges_resolved() {
9480 let src_base = "namespace N { public class Base { } }";
9481 let src_der = "namespace N { public class Derived : Base { } }";
9482 let tree_b = parse_once(LanguageId::CSharp, src_base).expect("parse");
9483 let tree_d = parse_once(LanguageId::CSharp, src_der).expect("parse");
9484 let files = vec![
9485 ParsedFile {
9486 path: PathBuf::from("/b/Base.cs"),
9487 language: LanguageId::CSharp,
9488 tree: tree_b,
9489 source: src_base.to_string(),
9490 is_test: false,
9491 },
9492 ParsedFile {
9493 path: PathBuf::from("/d/Derived.cs"),
9494 language: LanguageId::CSharp,
9495 tree: tree_d,
9496 source: src_der.to_string(),
9497 is_test: false,
9498 },
9499 ];
9500 let index = build_csharp_batch_index(&files, Path::new("."));
9501 let using = extract_csharp_using_summary(&files[1].tree, &files[1].source);
9502 let edges = extract_csharp_class_inheritance_edges(
9503 &files[1].tree,
9504 &files[1].source,
9505 Some("N"),
9506 &using,
9507 &index,
9508 );
9509 assert!(
9510 edges.contains(&(String::from("N.Derived"), String::from("N.Base"))),
9511 "edges={edges:?}"
9512 );
9513 }
9514
9515 #[test]
9516 fn csharp_symbols_constructor_function_matches_call_graph_fqn() {
9517 let src = r#"
9518namespace Ns {
9519 public class C {
9520 public C() { M(); }
9521 public void M() { }
9522 }
9523}
9524"#;
9525 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9526 let files = vec![ParsedFile {
9527 path: PathBuf::from("/t/C.cs"),
9528 language: LanguageId::CSharp,
9529 tree,
9530 source: src.to_string(),
9531 is_test: false,
9532 }];
9533 let index = build_csharp_batch_index(&files, Path::new("."));
9534 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9535 let (_, methods, _) = extract_csharp_symbols(&files[0].tree, &files[0].source);
9536 let ctor = methods
9537 .iter()
9538 .find(|f| f.fqn == "Ns.C.ctor#0")
9539 .expect("ctor#0 symbol");
9540 assert_eq!(ctor.name, "ctor#0");
9541 assert_eq!(ctor.class_fqn.as_deref(), Some("Ns.C"));
9542 let calls = extract_csharp_calls(
9543 &files[0].tree,
9544 &files[0].source,
9545 Some("Ns"),
9546 &using_summary,
9547 &index,
9548 );
9549 assert!(
9550 calls.iter().any(|(a, b)| a == "Ns.C.ctor#0" && b == "Ns.C.M"),
9551 "expected persisted ctor fqn as caller, got {calls:?}"
9552 );
9553 }
9554
9555 #[test]
9556 fn csharp_symbols_property_declares_accessors_and_property_symbol() {
9557 let src = r#"
9558namespace Ns {
9559 public class C {
9560 public string Name { get; set; }
9561 }
9562}
9563"#;
9564 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9565 let (_, methods, properties) = extract_csharp_symbols(&tree, src);
9566 let pname = properties.iter().find(|p| p.name == "Name").expect("property Name");
9567 assert_eq!(pname.fqn, "Ns.C.Name");
9568 assert_eq!(pname.class_fqn, "Ns.C");
9569 assert!(pname.declared_type.as_deref() == Some("string"));
9570 let get_f = methods
9571 .iter()
9572 .find(|f| f.fqn == "Ns.C.get_Name")
9573 .expect("getter function");
9574 assert_eq!(get_f.name, "get_Name");
9575 let set_f = methods
9576 .iter()
9577 .find(|f| f.fqn == "Ns.C.set_Name")
9578 .expect("setter function");
9579 assert_eq!(set_f.name, "set_Name");
9580 }
9581
9582 #[test]
9583 fn csharp_symbols_method_modifiers_params_return_type() {
9584 let src = r#"
9585namespace N {
9586 public class Api {
9587 public static async System.Threading.Tasks.Task<int> Foo(string s, OrderDto o) { return 0; }
9588 }
9589}
9590"#;
9591 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9592 let (_, methods, _) = extract_csharp_symbols(&tree, src);
9593 let f = methods.iter().find(|m| m.name == "Foo").expect("Foo");
9594 assert!(f.modifiers.contains(&"public".to_string()));
9595 assert!(f.modifiers.contains(&"static".to_string()));
9596 assert!(f.modifiers.contains(&"async".to_string()));
9597 assert_eq!(f.param_count, 2);
9598 assert_eq!(f.param_types, vec!["string", "OrderDto"]);
9599 assert!(f.return_type.as_deref().unwrap_or("").contains("Task<int>"));
9600 }
9601
9602 #[test]
9603 fn csharp_method_body_spans_nested_method_fqn() {
9604 let src = r#"
9605namespace N {
9606 public class Outer {
9607 public class Inner {
9608 public void M() { int x = 1; }
9609 }
9610 }
9611}
9612"#;
9613 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9614 let spans = csharp_method_body_spans(&tree, src, Some("N"));
9615 let m = spans.iter().find(|(fqn, _, _)| fqn.ends_with(".M")).expect("span M");
9616 assert_eq!(m.0, "N.Outer.Inner.M");
9617 }
9618
9619 #[test]
9620 fn csharp_uses_class_resolves_type_via_using_and_batch_index() {
9621 let src_other = "namespace OtherNs { public class RemoteDto { } }\n";
9622 let src_consumer = r#"
9623using OtherNs;
9624namespace ConsumerNs {
9625 public class Consumer {
9626 public void M() {
9627 RemoteDto x;
9628 }
9629 }
9630}
9631"#;
9632 let tree_other = parse_once(LanguageId::CSharp, src_other).expect("parse");
9633 let tree_consumer = parse_once(LanguageId::CSharp, src_consumer).expect("parse");
9634 let files = vec![
9635 ParsedFile {
9636 path: PathBuf::from("/repo/OtherNs/RemoteDto.cs"),
9637 language: LanguageId::CSharp,
9638 tree: tree_other,
9639 source: src_other.to_string(),
9640 is_test: false,
9641 },
9642 ParsedFile {
9643 path: PathBuf::from("/repo/Consumer.cs"),
9644 language: LanguageId::CSharp,
9645 tree: tree_consumer,
9646 source: src_consumer.to_string(),
9647 is_test: false,
9648 },
9649 ];
9650 let index = build_csharp_batch_index(&files, Path::new("."));
9651 let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
9652 let uses = extract_csharp_used_classes(
9653 &files[1].tree,
9654 &files[1].source,
9655 Some("ConsumerNs"),
9656 &using_summary,
9657 &index,
9658 );
9659 assert!(
9660 uses.iter().any(|(_, cls)| cls == "OtherNs.RemoteDto"),
9661 "expected USES_CLASS to OtherNs.RemoteDto, got {uses:?}"
9662 );
9663 }
9664
9665 #[test]
9666 fn csharp_type_alias_using_resolves_to_aliased_class_fqn() {
9667 let src_other = "namespace OtherNs { public class RemoteDto { } }\n";
9668 let src_consumer = r#"
9669using R = OtherNs.RemoteDto;
9670namespace ConsumerNs {
9671 public class Consumer {
9672 public void M() {
9673 R x;
9674 }
9675 }
9676}
9677"#;
9678 let tree_other = parse_once(LanguageId::CSharp, src_other).expect("parse");
9679 let tree_consumer = parse_once(LanguageId::CSharp, src_consumer).expect("parse");
9680 let files = vec![
9681 ParsedFile {
9682 path: PathBuf::from("/p/Other.cs"),
9683 language: LanguageId::CSharp,
9684 tree: tree_other,
9685 source: src_other.to_string(),
9686 is_test: false,
9687 },
9688 ParsedFile {
9689 path: PathBuf::from("/p/Consumer.cs"),
9690 language: LanguageId::CSharp,
9691 tree: tree_consumer,
9692 source: src_consumer.to_string(),
9693 is_test: false,
9694 },
9695 ];
9696 let index = build_csharp_batch_index(&files, Path::new("."));
9697 let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
9698 let uses = extract_csharp_used_classes(
9699 &files[1].tree,
9700 &files[1].source,
9701 Some("ConsumerNs"),
9702 &using_summary,
9703 &index,
9704 );
9705 assert!(uses.iter().any(|(_, cls)| cls == "OtherNs.RemoteDto"));
9706 }
9707
9708 #[test]
9709 fn csharp_uses_class_does_not_use_method_name_as_type_from_invocation() {
9710 let src = r#"
9711namespace Ns {
9712 public class C {
9713 public void GetOrder() { }
9714 public void M() {
9715 var x = GetOrder();
9716 }
9717 }
9718}
9719"#;
9720 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9721 let files = vec![ParsedFile {
9722 path: PathBuf::from("/t/C.cs"),
9723 language: LanguageId::CSharp,
9724 tree,
9725 source: src.to_string(),
9726 is_test: false,
9727 }];
9728 let index = build_csharp_batch_index(&files, Path::new("."));
9729 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9730 let uses = extract_csharp_used_classes(
9731 &files[0].tree,
9732 &files[0].source,
9733 Some("Ns"),
9734 &using_summary,
9735 &index,
9736 );
9737 assert!(
9738 !uses.iter().any(|(_, cls)| cls == "Ns.GetOrder"),
9739 "GetOrder() invocation must not create USES_CLASS to a synthetic type, got {uses:?}"
9740 );
9741 }
9742
9743 #[test]
9744 fn csharp_uses_class_skips_bcl_datetime_in_parameter() {
9745 let src = r#"
9746namespace Ns {
9747 public class C {
9748 public void M(System.DateTime d) { }
9749 }
9750}
9751"#;
9752 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9753 let files = vec![ParsedFile {
9754 path: PathBuf::from("/t/C.cs"),
9755 language: LanguageId::CSharp,
9756 tree,
9757 source: src.to_string(),
9758 is_test: false,
9759 }];
9760 let index = build_csharp_batch_index(&files, Path::new("."));
9761 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9762 let uses = extract_csharp_used_classes(
9763 &files[0].tree,
9764 &files[0].source,
9765 Some("Ns"),
9766 &using_summary,
9767 &index,
9768 );
9769 assert!(
9770 !uses.iter().any(|(_, cls)| cls.contains("DateTime")),
9771 "DateTime parameter should be filtered as BCL noise, got {uses:?}"
9772 );
9773 }
9774
9775 #[test]
9776 fn csharp_uses_class_does_not_include_types_only_used_in_local_function() {
9777 let src = r#"
9778namespace Ns {
9779 public class Outer {
9780 public void M() {
9781 void Local() {
9782 InnerOnly x;
9783 }
9784 }
9785 }
9786 public class InnerOnly { }
9787}
9788"#;
9789 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9790 let files = vec![ParsedFile {
9791 path: PathBuf::from("/t/Types.cs"),
9792 language: LanguageId::CSharp,
9793 tree,
9794 source: src.to_string(),
9795 is_test: false,
9796 }];
9797 let index = build_csharp_batch_index(&files, Path::new("."));
9798 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9799 let uses = extract_csharp_used_classes(
9800 &files[0].tree,
9801 &files[0].source,
9802 Some("Ns"),
9803 &using_summary,
9804 &index,
9805 );
9806 assert!(
9807 !uses
9808 .iter()
9809 .any(|(caller, cls)| caller == "Ns.Outer.M" && cls.contains("InnerOnly")),
9810 "InnerOnly only appears inside local function; outer M must not USES_CLASS it, got {uses:?}"
9811 );
9812 }
9813
9814 #[test]
9815 fn csharp_calls_resolve_field_receiver_to_type_fqn() {
9816 let src_repo =
9817 "namespace OtherNs { public class OrderRepo { public void Get(int id) { } } }\n";
9818 let src_svc = r#"
9819using OtherNs;
9820namespace ConsumerNs {
9821 public class Svc {
9822 private OrderRepo _repo;
9823 public void M() { _repo.Get(1); }
9824 }
9825}
9826"#;
9827 let tree_repo = parse_once(LanguageId::CSharp, src_repo).expect("parse");
9828 let tree_svc = parse_once(LanguageId::CSharp, src_svc).expect("parse");
9829 let files = vec![
9830 ParsedFile {
9831 path: PathBuf::from("/r/OrderRepo.cs"),
9832 language: LanguageId::CSharp,
9833 tree: tree_repo,
9834 source: src_repo.to_string(),
9835 is_test: false,
9836 },
9837 ParsedFile {
9838 path: PathBuf::from("/r/Svc.cs"),
9839 language: LanguageId::CSharp,
9840 tree: tree_svc,
9841 source: src_svc.to_string(),
9842 is_test: false,
9843 },
9844 ];
9845 let index = build_csharp_batch_index(&files, Path::new("."));
9846 let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
9847 let calls = extract_csharp_calls(
9848 &files[1].tree,
9849 &files[1].source,
9850 Some("ConsumerNs"),
9851 &using_summary,
9852 &index,
9853 );
9854 assert!(
9855 calls.iter().any(|(_, c)| c == "OtherNs.OrderRepo.Get"),
9856 "expected callee OtherNs.OrderRepo.Get, got {calls:?}"
9857 );
9858 }
9859
9860 #[test]
9861 fn csharp_calls_this_receiver_resolves_to_class_method() {
9862 let src = r#"
9863namespace Ns {
9864 public class C {
9865 void Helper() { }
9866 public void Run() { this.Helper(); }
9867 }
9868}
9869"#;
9870 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9871 let files = vec![ParsedFile {
9872 path: PathBuf::from("/t/C.cs"),
9873 language: LanguageId::CSharp,
9874 tree,
9875 source: src.to_string(),
9876 is_test: false,
9877 }];
9878 let index = build_csharp_batch_index(&files, Path::new("."));
9879 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9880 let calls = extract_csharp_calls(
9881 &files[0].tree,
9882 &files[0].source,
9883 Some("Ns"),
9884 &using_summary,
9885 &index,
9886 );
9887 assert!(
9888 calls.iter().any(|(_, c)| c == "Ns.C.Helper"),
9889 "expected callee Ns.C.Helper, got {calls:?}"
9890 );
9891 }
9892
9893 #[test]
9894 fn csharp_calls_constructor_invocations_extracted() {
9895 let src = r#"
9896namespace Ns {
9897 public class C {
9898 public C() { M(); }
9899 public void M() { }
9900 }
9901}
9902"#;
9903 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9904 let files = vec![ParsedFile {
9905 path: PathBuf::from("/t/C.cs"),
9906 language: LanguageId::CSharp,
9907 tree,
9908 source: src.to_string(),
9909 is_test: false,
9910 }];
9911 let index = build_csharp_batch_index(&files, Path::new("."));
9912 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9913 let calls = extract_csharp_calls(
9914 &files[0].tree,
9915 &files[0].source,
9916 Some("Ns"),
9917 &using_summary,
9918 &index,
9919 );
9920 assert!(
9921 calls.iter().any(|(a, b)| a == "Ns.C.ctor#0" && b == "Ns.C.M"),
9922 "expected ctor caller Ns.C.ctor#0 -> Ns.C.M, got {calls:?}"
9923 );
9924 }
9925
9926 #[test]
9927 fn csharp_calls_property_getter_invocations_extracted() {
9928 let src = r#"
9929namespace Ns {
9930 public class C {
9931 public int Prop {
9932 get { Helper(); return 1; }
9933 }
9934 void Helper() { }
9935 }
9936}
9937"#;
9938 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9939 let files = vec![ParsedFile {
9940 path: PathBuf::from("/t/C.cs"),
9941 language: LanguageId::CSharp,
9942 tree,
9943 source: src.to_string(),
9944 is_test: false,
9945 }];
9946 let index = build_csharp_batch_index(&files, Path::new("."));
9947 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9948 let calls = extract_csharp_calls(
9949 &files[0].tree,
9950 &files[0].source,
9951 Some("Ns"),
9952 &using_summary,
9953 &index,
9954 );
9955 assert!(
9956 calls.iter().any(|(a, b)| a == "Ns.C.get_Prop" && b == "Ns.C.Helper"),
9957 "expected getter caller Ns.C.get_Prop -> Ns.C.Helper, got {calls:?}"
9958 );
9959 }
9960
9961 #[test]
9962 fn csharp_calls_console_writeline_uses_well_known_type() {
9963 let src = r#"
9964namespace Ns {
9965 public class C {
9966 public void M() { Console.WriteLine("x"); }
9967 }
9968}
9969"#;
9970 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9971 let files = vec![ParsedFile {
9972 path: PathBuf::from("/t/C.cs"),
9973 language: LanguageId::CSharp,
9974 tree,
9975 source: src.to_string(),
9976 is_test: false,
9977 }];
9978 let index = build_csharp_batch_index(&files, Path::new("."));
9979 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9980 let calls = extract_csharp_calls(
9981 &files[0].tree,
9982 &files[0].source,
9983 Some("Ns"),
9984 &using_summary,
9985 &index,
9986 );
9987 assert!(
9988 calls.iter().any(|(_, c)| c == "System.Console.WriteLine"),
9989 "expected System.Console.WriteLine, got {calls:?}"
9990 );
9991 }
9992
9993 #[test]
9994 fn csharp_external_http_urls_ignore_comments_not_string_literals() {
9995 let src = r#"
9996namespace Ns {
9997 class C {
9998 // https://evil-line.example/x
9999 void M() {
10000 /* https://evil-block.example/y */
10001 var x = "https://good.example/only";
10002 }
10003 }
10004}
10005"#;
10006 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
10007 let urls = extract_csharp_external_http_urls_with_spans(&tree, src);
10008 assert_eq!(urls.len(), 1, "expected one URL from string literal, got {urls:?}");
10009 assert!(
10010 urls[0].0.contains("good.example"),
10011 "unexpected url {:?}",
10012 urls[0].0
10013 );
10014 assert!(
10015 !urls.iter().any(|(u, _, _)| u.contains("evil")),
10016 "comment URLs must not appear: {urls:?}"
10017 );
10018 }
10019
10020 #[test]
10021 fn csharp_external_api_links_url_only_to_methods_whose_body_contains_literal() {
10022 let src = r#"
10023namespace Ns {
10024 public class C {
10025 void A() { var x = "https://api-a.example/v1"; }
10026 void B() { var y = "https://api-b.example/v2"; }
10027 }
10028}
10029"#;
10030 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
10031 let (_, methods, _) = extract_csharp_symbols(&tree, src);
10032 let url_spans = extract_csharp_external_http_urls_with_spans(&tree, src);
10033 let method_spans = csharp_method_body_spans(&tree, src, Some("Ns"));
10034 let mut spans_by_fqn: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
10035 for (fqn, lo, hi) in method_spans {
10036 spans_by_fqn.entry(fqn).or_default().push((lo, hi));
10037 }
10038 let mut pairs: Vec<(String, String)> = Vec::new();
10039 for (full_url, u_start, u_end) in &url_spans {
10040 for func in &methods {
10041 let Some(ranges) = spans_by_fqn.get(&func.fqn) else {
10042 continue;
10043 };
10044 if !ranges
10045 .iter()
10046 .any(|(lo, hi)| *lo <= *u_start && *u_end <= *hi)
10047 {
10048 continue;
10049 }
10050 pairs.push((func.fqn.clone(), full_url.clone()));
10051 }
10052 }
10053 pairs.sort();
10054 assert!(
10055 pairs.contains(&(String::from("Ns.C.A"), String::from("https://api-a.example/v1"))),
10056 "missing A->api-a, got {pairs:?}"
10057 );
10058 assert!(
10059 pairs.contains(&(String::from("Ns.C.B"), String::from("https://api-b.example/v2"))),
10060 "missing B->api-b, got {pairs:?}"
10061 );
10062 assert!(
10063 !pairs.contains(&(String::from("Ns.C.A"), String::from("https://api-b.example/v2"))),
10064 "N×M leak: A linked to B's URL: {pairs:?}"
10065 );
10066 assert!(
10067 !pairs.contains(&(String::from("Ns.C.B"), String::from("https://api-a.example/v1"))),
10068 "N×M leak: B linked to A's URL: {pairs:?}"
10069 );
10070 }
10071
10072 #[test]
10073 fn resolves_module_name_from_erl_attribute() {
10074 let source = "-module(real_mod).\nfoo() -> ok.\n";
10075 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10076 let resolved = resolve_erlang_module_name(Path::new("/tmp/not_matching.erl"), &tree, source);
10077 assert_eq!(resolved.as_deref(), Some("real_mod"));
10078 }
10079 #[test]
10082 fn resolves_module_name_from_erl_basename_fallback() {
10083 let source = "foo() -> ok.\n";
10084 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10085 let resolved = resolve_erlang_module_name(Path::new("/tmp/fallback_name.erl"), &tree, source);
10086 assert_eq!(resolved.as_deref(), Some("fallback_name"));
10087 }
10088 #[test]
10090 fn does_not_fallback_module_name_for_hrl() {
10091 let source = "-define(FLAG, true).\n";
10092 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10093 let resolved = resolve_erlang_module_name(Path::new("/tmp/records.hrl"), &tree, source);
10094 assert_eq!(resolved, None);
10095 }
10096
10097 #[test]
10098 fn extracts_erlang_behaviour_usages_from_ast() {
10099 let source = r#"
10100 -behaviour(gen_server).
10101 -behavior(custom_behaviour).
10102 foo() -> ok.
10103 "#;
10104 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10105 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
10106 assert!(meta.behaviour_usages.contains("gen_server"));
10107 assert!(meta.behaviour_usages.contains("custom_behaviour"));
10108 }
10109
10110 #[test]
10111 fn extracts_declared_and_optional_callbacks_from_ast() {
10112 let source = r#"
10113 -callback init(term()) -> {ok, state()}.
10114 -callback handle_call(term(), term(), term()) -> {reply, ok, term()}.
10115 -optional_callbacks([handle_call/3]).
10116 "#;
10117 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10118 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
10119
10120 assert!(meta.declared_callbacks.contains(&(String::from("init"), 1)));
10121 assert!(
10122 meta.declared_callbacks
10123 .contains(&(String::from("handle_call"), 3))
10124 );
10125 assert!(
10126 meta.optional_callbacks
10127 .contains(&(String::from("handle_call"), 3))
10128 );
10129 assert!(!meta.optional_callbacks.contains(&(String::from("init"), 1)));
10130 }
10131
10132 #[test]
10133 fn extracts_behaviour_extension_and_override_hints_from_ast() {
10134 let source = r#"
10135 -extends_behaviour(base_handler).
10136 -override_callback(handle_call/3).
10137 "#;
10138 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10139 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
10140 assert!(meta.behaviour_extensions.contains("base_handler"));
10141 assert!(
10142 meta.overridden_callbacks
10143 .contains(&(String::from("handle_call"), 3))
10144 );
10145 }
10146
10147 #[test]
10148 fn ast_extracts_multiline_and_quoted_attributes() {
10149 let source = r#"
10150 -'behaviour'('gen_server').
10151 -callback
10152 'handle_call'(
10153 term(),
10154 term(),
10155 term()
10156 ) ->
10157 {reply, ok, term()}.
10158 -optional_callbacks([
10159 'handle_call'/3
10160 ]).
10161 "#;
10162 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10163 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
10164 assert!(meta.behaviour_usages.contains("gen_server"));
10165 assert!(
10166 meta.declared_callbacks
10167 .contains(&(String::from("handle_call"), 3))
10168 );
10169 assert!(
10170 meta.optional_callbacks
10171 .contains(&(String::from("handle_call"), 3))
10172 );
10173 }
10174
10175 #[test]
10176 fn ast_skips_macro_or_variable_names_for_safety() {
10177 let source = r#"
10178 -behaviour(?DYN_BEHAVIOUR).
10179 -optional_callbacks([?CALLBACK/2]).
10180 -extends_behaviour(ParentVar).
10181 -override_callback(?OVERRIDE/3).
10182 "#;
10183 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10184 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
10185 assert!(meta.behaviour_usages.is_empty());
10186 assert!(meta.optional_callbacks.is_empty());
10187 assert!(meta.behaviour_extensions.is_empty());
10188 assert!(meta.overridden_callbacks.is_empty());
10189 }
10190
10191 #[test]
10192 fn maps_functions_to_callback_contracts() {
10193 let source = r#"
10194 -behaviour(gen_server).
10195 -callback local_cb(term()) -> ok.
10196 -optional_callbacks([local_cb/1]).
10197 init(Args) -> {ok, Args}.
10198 handle_call(_Req, _From, State) -> {reply, ok, State}.
10199 local_cb(X) -> X.
10200 "#;
10201 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10202 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
10203 let contracts = collect_callback_contracts_for_module(
10204 Some("my_behaviour"),
10205 &meta.behaviour_usages,
10206 &meta.declared_callbacks,
10207 &meta.optional_callbacks,
10208 );
10209
10210 assert!(contracts.iter().any(|c| {
10212 c.behaviour == "gen_server" && c.name == "handle_call" && c.arity == 3
10213 }));
10214 assert!(contracts.iter().any(|c| {
10216 c.behaviour == "my_behaviour"
10217 && c.name == "local_cb"
10218 && c.arity == 1
10219 && c.optional
10220 }));
10221 }
10222
10223 #[test]
10224 fn extracts_erlang_functions_from_ast_multiline_and_quoted() {
10225 let source = r#"
10226 -module(my_handler).
10227 'special_name'(
10228 Req,
10229 State
10230 ) ->
10231 {ok, State}.
10232 websocket_handle(Frame, State) ->
10233 {ok, State}.
10234 "#;
10235 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10236 let functions = extract_erlang_functions("my_handler", &tree, source);
10237 let sigs: HashSet<(String, u32)> = functions
10238 .iter()
10239 .map(|(name, arity, _)| (name.clone(), *arity))
10240 .collect();
10241
10242 assert!(sigs.contains(&(String::from("special_name"), 2)));
10243 assert!(sigs.contains(&(String::from("websocket_handle"), 2)));
10244 }
10245
10246 #[test]
10247 fn erlang_function_spans_keyed_by_fqn() {
10248 let source = r#"
10249-module(m).
10250handle(Req, State) ->
10251 {ok, State}.
10252"#;
10253 let tree = parse_once(LanguageId::Erlang, source).expect("parse");
10254 let spans = extract_erlang_function_spans("m", &tree, source);
10255 let fqn = "m:handle/2";
10256 let (lo, hi) = spans.get(fqn).copied().expect("span for handle/2");
10257 assert!(source[lo..hi].contains("handle(Req, State)"));
10258 }
10259
10260 #[test]
10261 fn java_method_body_spans_include_method_body() {
10262 let source = r#"
10263package com.example;
10264class A {
10265 void m() { return; }
10266}
10267"#;
10268 let tree = parse_once(LanguageId::Java, source).expect("parse");
10269 let spans = extract_java_method_body_spans(&tree, source, Some("com.example"));
10270 let (lo, hi) = spans.get("com.example.A.m").copied().expect("span");
10271 assert!(source[lo..hi].contains("return"));
10272 }
10273
10274 #[test]
10275 fn selects_endpoint_handler_callbacks_from_contracts_and_implemented_signatures() {
10276 let mut idx = HashMap::new();
10277 idx.insert(
10278 String::from("omega_ws_handler"),
10279 ErlangModuleSnapshot {
10280 implemented_signatures: HashSet::from([
10281 (String::from("init"), 2),
10282 (String::from("websocket_handle"), 2),
10283 (String::from("not_a_callback"), 1),
10284 ]),
10285 callback_signatures: HashSet::from([
10286 (String::from("init"), 2),
10287 (String::from("websocket_handle"), 2),
10288 (String::from("websocket_info"), 2),
10289 ]),
10290 },
10291 );
10292
10293 let fqns = select_endpoint_handler_fqns("omega_ws_handler", &idx);
10294 assert_eq!(
10295 fqns,
10296 vec![
10297 String::from("omega_ws_handler:init/2"),
10298 String::from("omega_ws_handler:websocket_handle/2"),
10299 ]
10300 );
10301 }
10302
10303 #[test]
10304 fn endpoint_handler_callback_selection_is_strict_when_metadata_missing() {
10305 let mut idx = HashMap::new();
10306 idx.insert(
10307 String::from("router_only"),
10308 ErlangModuleSnapshot {
10309 implemented_signatures: HashSet::from([(String::from("init"), 2)]),
10310 callback_signatures: HashSet::new(),
10311 },
10312 );
10313
10314 let none_for_unknown = select_endpoint_handler_fqns("missing_module", &idx);
10315 assert!(none_for_unknown.is_empty());
10316
10317 let none_for_no_contracts = select_endpoint_handler_fqns("router_only", &idx);
10318 assert!(none_for_no_contracts.is_empty());
10319 }
10320
10321 #[test]
10322 fn extracts_precise_erlang_call_edges_from_ast() {
10323 let source = r#"
10324 -module(my_mod).
10325 a() -> b(), c(), ok.
10326 b() -> ok.
10327 c() -> lists:map(fun(X) -> X end, [1,2]).
10328 "#;
10329 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10330 let functions = extract_erlang_functions("my_mod", &tree, source);
10331 let function_by_sig: HashMap<(String, u32), String> = functions
10332 .iter()
10333 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
10334 .collect();
10335 let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
10336
10337 assert!(edges.contains(&(String::from("my_mod:a/0"), String::from("my_mod:b/0"))));
10338 assert!(edges.contains(&(String::from("my_mod:a/0"), String::from("my_mod:c/0"))));
10339 assert!(!edges.contains(&(String::from("my_mod:c/0"), String::from("my_mod:map/2"))));
10341 }
10342
10343 #[test]
10344 fn does_not_create_nm_edges_for_sparse_local_calls() {
10345 let source = r#"
10346 -module(my_mod).
10347 a() -> b().
10348 b() -> ok.
10349 c() -> ok.
10350 d() -> ok.
10351 "#;
10352 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10353 let functions = extract_erlang_functions("my_mod", &tree, source);
10354 let function_by_sig: HashMap<(String, u32), String> = functions
10355 .iter()
10356 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
10357 .collect();
10358 let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
10359
10360 let expected = HashSet::from([(String::from("my_mod:a/0"), String::from("my_mod:b/0"))]);
10361 assert_eq!(edges, expected);
10362 }
10363
10364 #[test]
10365 fn attributes_calls_to_enclosing_multi_clause_function() {
10366 let source = r#"
10367 -module(my_mod).
10368 foo(0) -> bar();
10369 foo(N) -> baz(N).
10370 bar() -> ok.
10371 baz(_N) -> ok.
10372 "#;
10373 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10374 let functions = extract_erlang_functions("my_mod", &tree, source);
10375 let function_by_sig: HashMap<(String, u32), String> = functions
10376 .iter()
10377 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
10378 .collect();
10379 let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
10380
10381 assert!(edges.contains(&(String::from("my_mod:foo/1"), String::from("my_mod:bar/0"))));
10382 assert!(edges.contains(&(String::from("my_mod:foo/1"), String::from("my_mod:baz/1"))));
10383 assert_eq!(edges.len(), 2);
10384 }
10385
10386 #[test]
10387 fn extracts_called_modules_from_ast_remote_calls() {
10388 let source = r#"
10389 -module(my_mod).
10390 a() -> lists:map(fun(X) -> X end, [1,2]), my_dep:run().
10391 "#;
10392 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10393 let mods = extract_erlang_called_modules_from_tree(&tree, source);
10394 assert!(mods.contains("lists"));
10395 assert!(mods.contains("my_dep"));
10396 }
10397
10398 #[test]
10399 fn extracts_cowboy_endpoints_from_ast_multiline_tuples() {
10400 let source = r#"
10401 Dispatch = cowboy_router:compile([
10402 {'_', [
10403 {"/v1/ping", ping_handler, []},
10404 {
10405 "/v1/ws",
10406 websocket_handler,
10407 []
10408 }
10409 ]}
10410 ]).
10411 "#;
10412 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10413 let endpoints = extract_erlang_api_endpoints(&tree, source);
10414 let keyset: HashSet<(String, String)> = endpoints
10415 .into_iter()
10416 .map(|(_, path, handler)| (path, handler))
10417 .collect();
10418 assert!(keyset.contains(&(String::from("/v1/ping"), String::from("ping_handler"))));
10419 assert!(keyset.contains(&(String::from("/v1/ws"), String::from("websocket_handler"))));
10420 }
10421
10422 #[test]
10423 fn extracts_external_urls_from_ast_strings_only() {
10424 let source = r#"
10425 -module(my_mod).
10426 a() ->
10427 Url = "https://api.example.com/v1/orders?x=1",
10428 io:format("~p", [Url]).
10429 % "https://comment.only/should/not/appear"
10430 "#;
10431 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10432 let urls = extract_external_http_urls_from_tree(&tree, source);
10433 assert!(urls.contains(&String::from("https://api.example.com/v1/orders?x=1")));
10434 assert!(!urls.iter().any(|u| u.contains("comment.only")));
10435 }
10436
10437 #[test]
10440 fn java_parse_warnings_surface_error_nodes() {
10441 let src = r#"
10442package p;
10443class Broken { void m( // missing close paren and brace
10444"#;
10445 let tree = parse_once(LanguageId::Java, src).expect("parse");
10446 let w = extract_java_parse_warnings(&tree, src);
10447 assert!(
10448 !w.is_empty(),
10449 "expected ERROR/missing warnings, got {w:?}"
10450 );
10451 assert!(w.iter().any(|(line, _, _)| *line >= 2), "got {w:?}");
10452 }
10453
10454 #[test]
10455 fn java_method_extracts_return_and_param_types() {
10456 let src = r#"
10457package com.example;
10458import java.util.List;
10459import org.springframework.http.ResponseEntity;
10460class Order {}
10461class C {
10462 public ResponseEntity<List<Order>> getOrders(String id, int page) { return null; }
10463}
10464"#;
10465 let tree = parse_once(LanguageId::Java, src).expect("parse");
10466 let (_, methods) = extract_java_symbols(&tree, src, Some("com.example"));
10467 let m = methods
10468 .iter()
10469 .find(|f| f.name == "getOrders")
10470 .expect("getOrders");
10471 assert_eq!(m.return_type.as_deref(), Some("ResponseEntity"));
10472 assert_eq!(m.param_types, vec!["String", "int"]);
10473 assert_eq!(m.param_count, 2);
10474 }
10475
10476 #[test]
10477 fn java_inheritance_edges_extends_and_implements() {
10478 let src = r#"
10479package p;
10480class Parent {}
10481class Child extends Parent implements java.io.Serializable {}
10482"#;
10483 let tree = parse_once(LanguageId::Java, src).expect("parse");
10484 let edges = extract_java_inheritance_edges(&tree, src, Some("p"));
10485 assert!(edges.contains(&(String::from("p.Child"), String::from("p.Parent"))));
10486 assert!(edges.contains(&(
10487 String::from("p.Child"),
10488 String::from("java.io.Serializable")
10489 )));
10490 }
10491
10492 #[test]
10493 fn java_class_and_method_annotations_extracted() {
10494 let src = r#"
10495package p;
10496@Service
10497class Svc {
10498 @Override
10499 @Deprecated
10500 void run() {}
10501}
10502"#;
10503 let tree = parse_once(LanguageId::Java, src).expect("parse");
10504 let c = extract_java_class_annotations(&tree, src, Some("p"));
10505 assert!(c.iter().any(|(fqn, a)| fqn == "p.Svc" && a.contains(&String::from("Service"))));
10506 let m = extract_java_method_annotations(&tree, src, Some("p"));
10507 let (_, anns) = m.iter().find(|(f, _)| f.ends_with(".run")).expect("run");
10508 assert!(anns.contains(&String::from("Override")));
10509 assert!(anns.contains(&String::from("Deprecated")));
10510 }
10511
10512 #[test]
10513 fn java_injected_dependencies_constructor_and_autowired_field() {
10514 let src = r#"
10515package p;
10516class OrderRepo {}
10517class UserService {}
10518class MyService {
10519 @Autowired
10520 OrderRepo repo;
10521 public MyService(UserService svc, OrderRepo r2) {}
10522}
10523"#;
10524 let tree = parse_once(LanguageId::Java, src).expect("parse");
10525 let deps = extract_java_injected_dependencies(&tree, src, Some("p"));
10526 assert!(deps.contains(&(String::from("p.MyService"), String::from("p.OrderRepo"))));
10527 assert!(deps.contains(&(String::from("p.MyService"), String::from("p.UserService"))));
10528 }
10529
10530 #[test]
10531 fn go_parse_warnings_surface_error_nodes() {
10532 let src = r#"package main
10533func main() { x :=
10534"#;
10535 let tree = parse_once(LanguageId::Go, src).expect("parse");
10536 let w = extract_go_parse_warnings(&tree, src);
10537 assert!(!w.is_empty(), "expected warnings, got {w:?}");
10538 }
10539
10540 #[test]
10541 fn go_extracts_interface_kind_and_struct_embedding() {
10542 let src = r#"package main
10543import "io"
10544type Reader interface { Read(p []byte) (n int, err error) }
10545type MyStruct struct {
10546 io.Reader
10547 Name string
10548}
10549"#;
10550 let tree = parse_once(LanguageId::Go, src).expect("parse");
10551 let (classes, _) = extract_go_symbols(&tree, src, Some("main"));
10552 assert!(classes.iter().any(|c| c.name == "Reader" && c.kind == Some("interface")));
10553 assert!(classes.iter().any(|c| c.name == "MyStruct" && c.kind == Some("struct")));
10554 let emb = extract_go_embedding(&tree, src, Some("main"));
10555 assert!(emb.contains(&(String::from("main.MyStruct"), String::from("io.Reader"))));
10556 }
10557
10558 #[test]
10559 fn go_goroutine_call_extracted_as_calls_function_pair() {
10560 let src = r#"package main
10561func worker() {}
10562func main() { go worker() }
10563"#;
10564 let tree = parse_once(LanguageId::Go, src).expect("parse");
10565 let g = extract_go_goroutine_calls(&tree, src, Some("main"));
10566 assert!(
10567 g.contains(&(String::from("main.main"), String::from("main.worker"))),
10568 "got {g:?}"
10569 );
10570 }
10571
10572 #[test]
10573 fn go_method_pointer_receiver_flag() {
10574 let src = r#"package main
10575type User struct{}
10576func (u *User) GetName() string { return "" }
10577func (u User) String() string { return "" }
10578"#;
10579 let tree = parse_once(LanguageId::Go, src).expect("parse");
10580 let (_, funcs) = extract_go_symbols(&tree, src, Some("main"));
10581 let get = funcs.iter().find(|f| f.name == "GetName").expect("GetName");
10582 assert_eq!(get.is_pointer_receiver, Some(true));
10583 let s = funcs.iter().find(|f| f.name == "String").expect("String");
10584 assert_eq!(s.is_pointer_receiver, Some(false));
10585 }
10586
10587 #[test]
10588 fn go_import_paths_extracted_from_grouped_import() {
10589 let src = r#"package main
10590import (
10591 "fmt"
10592 "github.com/gorilla/mux"
10593 "myproject/internal/handler"
10594)
10595func main() {}
10596"#;
10597 let tree = parse_once(LanguageId::Go, src).expect("parse");
10598 let imps = extract_go_imports(&tree, src);
10599 assert!(imps.contains(&String::from("fmt")));
10600 assert!(imps.contains(&String::from("github.com/gorilla/mux")));
10601 assert!(imps.contains(&String::from("myproject/internal/handler")));
10602 }
10603
10604 #[test]
10605 fn go_import_resolves_to_known_scanned_file_path() {
10606 let mut known = HashSet::new();
10607 known.insert(String::from("/repo/myproject/internal/handler/api.go"));
10608 let dep =
10609 resolve_go_import_to_known_go_file("myproject/internal/handler", &known, &[], &[], None);
10610 assert_eq!(dep.as_deref(), Some("/repo/myproject/internal/handler/api.go"));
10611 }
10612
10613 #[test]
10614 fn rust_graph_symbols_use_file_scoped_fqn() {
10615 let src = "fn hello() {}";
10616 let tree = parse_once(LanguageId::Rust, src).expect("parse");
10617 let file = ParsedFile {
10618 path: PathBuf::from("/x/a.rs"),
10619 language: LanguageId::Rust,
10620 tree,
10621 source: src.to_string(),
10622 is_test: false,
10623 };
10624 let syms = extract_non_java_function_symbols(&file, src, "/x/a.rs");
10625 assert_eq!(syms.len(), 1);
10626 assert_eq!(syms[0].fqn, "/x/a.rs::hello");
10627 }
10628
10629 #[test]
10630 fn rust_ir_emits_intrafile_calls() {
10631 let src = r#"
10632fn callee() {}
10633fn caller() {
10634 callee();
10635}
10636"#;
10637 let tree = parse_once(LanguageId::Rust, src).expect("parse");
10638 let files = vec![ParsedFile {
10639 path: PathBuf::from("src/a.rs"),
10640 language: LanguageId::Rust,
10641 tree,
10642 source: src.to_string(),
10643 is_test: false,
10644 }];
10645 let ir = build_project_ir(Path::new("/repo"), &files, &ExtractOptions::default());
10646 let has_call = ir.edges.iter().any(|e| {
10647 e.kind == EdgeKind::CallsFunction
10648 && e.from_key == "src/a.rs::caller"
10649 && e.to_key == "src/a.rs::callee"
10650 });
10651 assert!(has_call, "expected intra-file CALLS_FUNCTION");
10652 }
10653
10654 #[test]
10655 fn rust_ir_emits_depends_on_file_from_use() {
10656 let caller_src = "use crate::other;\nfn caller() {}";
10657 let callee_src = "pub fn callee() {}";
10658 let caller_tree = parse_once(LanguageId::Rust, caller_src).expect("parse");
10659 let callee_tree = parse_once(LanguageId::Rust, callee_src).expect("parse");
10660 let files = vec![
10661 ParsedFile {
10662 path: PathBuf::from("src/caller.rs"),
10663 language: LanguageId::Rust,
10664 tree: caller_tree,
10665 source: caller_src.to_string(),
10666 is_test: false,
10667 },
10668 ParsedFile {
10669 path: PathBuf::from("src/other.rs"),
10670 language: LanguageId::Rust,
10671 tree: callee_tree,
10672 source: callee_src.to_string(),
10673 is_test: false,
10674 },
10675 ];
10676 let ir = build_project_ir(Path::new("/repo"), &files, &ExtractOptions::default());
10677 let has_dep = ir.edges.iter().any(|e| {
10678 e.kind == EdgeKind::DependsOnFile
10679 && e.from_key == "src/caller.rs"
10680 && e.to_key == "src/other.rs"
10681 });
10682 assert!(has_dep, "expected DEPENDS_ON_FILE from use crate::other");
10683 }
10684
10685 #[test]
10686 fn go_ir_emits_calls_function_edges() {
10687 let src = r#"
10688package main
10689
10690import "fmt"
10691
10692func helper() { fmt.Println("x") }
10693
10694func main() { helper() }
10695"#;
10696 let tree = parse_once(LanguageId::Go, src).expect("parse");
10697 let files = vec![ParsedFile {
10698 path: PathBuf::from("main.go"),
10699 language: LanguageId::Go,
10700 tree,
10701 source: src.to_string(),
10702 is_test: false,
10703 }];
10704 let ir = build_project_ir(Path::new("/repo"), &files, &ExtractOptions::default());
10705 let has_call = ir.edges.iter().any(|e| e.kind == EdgeKind::CallsFunction);
10706 assert!(has_call, "expected at least one CALLS_FUNCTION edge for Go");
10707 }
10708
10709 #[test]
10710 fn python_graph_symbols_file_scoped_fqn_and_nested() {
10711 let src = r#"
10712def top():
10713 pass
10714def outer():
10715 def inner():
10716 pass
10717 pass
10718"#;
10719 let tree = parse_once(LanguageId::Python, src).expect("parse");
10720 let file = ParsedFile {
10721 path: PathBuf::from("/app/mod.py"),
10722 language: LanguageId::Python,
10723 tree,
10724 source: src.to_string(),
10725 is_test: false,
10726 };
10727 let syms = extract_non_java_function_symbols(&file, src, "/app/mod.py");
10728 let fqns: Vec<&str> = syms.iter().map(|s| s.fqn.as_str()).collect();
10729 assert!(fqns.contains(&"/app/mod.py::top"));
10730 assert!(fqns.contains(&"/app/mod.py::outer"));
10731 assert!(fqns.contains(&"/app/mod.py::outer.inner"));
10732 }
10733
10734 #[test]
10735 fn python_class_methods_excluded_from_graph_symbols() {
10736 let src = r#"
10737class C:
10738 def meth(self):
10739 pass
10740def global_fn():
10741 pass
10742"#;
10743 let tree = parse_once(LanguageId::Python, src).expect("parse");
10744 let file = ParsedFile {
10745 path: PathBuf::from("/app/c.py"),
10746 language: LanguageId::Python,
10747 tree,
10748 source: src.to_string(),
10749 is_test: false,
10750 };
10751 let syms = extract_non_java_function_symbols(&file, src, "/app/c.py");
10752 assert!(!syms.iter().any(|s| s.name == "meth"));
10753 assert!(syms.iter().any(|s| s.name == "global_fn"));
10754 }
10755
10756 #[test]
10757 fn js_graph_symbols_class_method_arrow_and_top_level() {
10758 let src = r#"
10759class Box {
10760 run() { return 1; }
10761 go = () => 2;
10762}
10763const top = () => {};
10764function decl() {}
10765"#;
10766 let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
10767 let file = ParsedFile {
10768 path: PathBuf::from("/app/box.js"),
10769 language: LanguageId::JavaScript,
10770 tree,
10771 source: src.to_string(),
10772 is_test: false,
10773 };
10774 let syms = extract_non_java_function_symbols(&file, src, "/app/box.js");
10775 let fqns: Vec<&str> = syms.iter().map(|s| s.fqn.as_str()).collect();
10776 assert!(
10777 fqns.iter().any(|f| f.ends_with("::Box.run")),
10778 "got {fqns:?}"
10779 );
10780 assert!(
10781 fqns.iter().any(|f| f.ends_with("::Box.go")),
10782 "got {fqns:?}"
10783 );
10784 assert!(fqns.iter().any(|f| f.ends_with("::top")), "got {fqns:?}");
10785 assert!(fqns.iter().any(|f| f.ends_with("::decl")), "got {fqns:?}");
10786 }
10787
10788 #[test]
10789 fn ts_graph_symbols_include_class_method() {
10790 let src = r#"
10791class Svc {
10792 handle(): void {}
10793}
10794"#;
10795 let tree = parse_once(LanguageId::TypeScript, src).expect("parse");
10796 let file = ParsedFile {
10797 path: PathBuf::from("/svc/h.ts"),
10798 language: LanguageId::TypeScript,
10799 tree,
10800 source: src.to_string(),
10801 is_test: false,
10802 };
10803 let syms = extract_non_java_function_symbols(&file, src, "/svc/h.ts");
10804 assert!(
10805 syms.iter().any(|s| s.fqn.ends_with("::Svc.handle")),
10806 "got {:?}",
10807 syms.iter().map(|s| &s.fqn).collect::<Vec<_>>()
10808 );
10809 }
10810
10811 #[test]
10812 fn python_parse_warnings_surface_error_nodes() {
10813 let src = "def foo(\n";
10814 let tree = parse_once(LanguageId::Python, src).expect("parse");
10815 let w = extract_python_parse_warnings(&tree, src);
10816 assert!(!w.is_empty(), "expected warnings, got {w:?}");
10817 }
10818
10819 #[test]
10820 fn js_parse_warnings_surface_error_nodes() {
10821 let src = "function f( {";
10822 let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
10823 let w = extract_js_ts_parse_warnings(&tree, src);
10824 assert!(!w.is_empty(), "expected warnings, got {w:?}");
10825 }
10826
10827 #[test]
10828 fn python_import_resolves_to_known_py_file() {
10829 let mut known = HashSet::new();
10830 known.insert("/repo/pkg/helper.py".to_string());
10831 let dep = resolve_python_import_to_known_file("pkg.helper", &known);
10832 assert_eq!(dep.as_deref(), Some("/repo/pkg/helper.py"));
10833 }
10834
10835 #[test]
10836 fn python_intrafile_call_edge() {
10837 let src = r#"
10838def callee():
10839 pass
10840def caller():
10841 callee()
10842"#;
10843 let tree = parse_once(LanguageId::Python, src).expect("parse");
10844 let fp = "/t/a.py";
10845 let mut name_to_fqn = HashMap::new();
10846 name_to_fqn.insert("callee".into(), format!("{fp}::callee"));
10847 name_to_fqn.insert("caller".into(), format!("{fp}::caller"));
10848 let calls = extract_python_intrafile_calls(&tree, src, fp, &name_to_fqn);
10849 assert!(
10850 calls.contains(&(format!("{fp}::caller"), format!("{fp}::callee"))),
10851 "got {calls:?}"
10852 );
10853 }
10854
10855 #[test]
10856 fn js_ts_relative_import_resolves_to_known_file() {
10857 let mut known = HashSet::new();
10858 known.insert("/repo/src/util.ts".to_string());
10859 let dep = resolve_js_ts_import_to_known_file("./util", "/repo/src/main.ts", &known);
10860 assert_eq!(dep.as_deref(), Some("/repo/src/util.ts"));
10861 }
10862
10863 #[test]
10864 fn js_intrafile_call_edge() {
10865 let src = r#"
10866function callee() {}
10867function caller() { callee(); }
10868"#;
10869 let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
10870 let fp = "/t/b.js";
10871 let mut name_to_fqn = HashMap::new();
10872 name_to_fqn.insert("callee".into(), format!("{fp}::callee"));
10873 name_to_fqn.insert("caller".into(), format!("{fp}::caller"));
10874 let calls = extract_js_ts_intrafile_calls(
10875 &tree,
10876 src,
10877 fp,
10878 LanguageId::JavaScript,
10879 &name_to_fqn,
10880 );
10881 assert!(
10882 calls.contains(&(format!("{fp}::caller"), format!("{fp}::callee"))),
10883 "got {calls:?}"
10884 );
10885 }
10886}
10887