1use std::collections::{HashMap, HashSet};
7
8use chrono::Utc;
9use codemem_core::{Edge, GraphNode, MemoryNode, MemoryType, NodeKind, RelationshipType};
10
11use codemem_core::ScipConfig;
12
13use super::{
14 is_import_ref, is_read_ref, is_write_ref, ScipDefinition, ScipReadResult, ROLE_IMPORT,
15 ROLE_READ_ACCESS, ROLE_WRITE_ACCESS,
16};
17
18#[derive(Debug, Clone, Default)]
20pub struct ScipBuildResult {
21 pub nodes: Vec<GraphNode>,
22 pub edges: Vec<Edge>,
23 pub memories: Vec<(MemoryNode, String)>, pub ext_nodes_created: usize,
25 pub files_covered: HashSet<String>,
26 pub doc_memories_created: usize,
27}
28
29pub fn build_graph(
34 scip: &ScipReadResult,
35 namespace: Option<&str>,
36 config: &ScipConfig,
37) -> ScipBuildResult {
38 let now = Utc::now();
39 let ns = namespace.map(|s| s.to_string());
40
41 let mut nodes = Vec::new();
42 let mut edges = Vec::new();
43 let mut memories: Vec<(MemoryNode, String)> = Vec::new();
44 let mut ext_nodes_created = 0;
45 let mut doc_memories_created = 0;
46
47 let path_filtered: Vec<&ScipDefinition> = scip
55 .definitions
56 .iter()
57 .filter(|d| is_source_path(&d.file_path) && !is_wildcard_module(&d.qualified_name))
58 .collect();
59
60 let mut source_defs: Vec<&ScipDefinition> = Vec::with_capacity(path_filtered.len());
63 let mut parsed_symbols: Vec<scip::types::Symbol> = Vec::with_capacity(path_filtered.len());
64 for def in &path_filtered {
65 let parsed = match scip::symbol::parse_symbol(&def.scip_symbol) {
66 Ok(p) => p,
67 Err(_) => {
68 source_defs.push(def);
70 parsed_symbols.push(scip::types::Symbol::default());
71 continue;
72 }
73 };
74 if is_noise_symbol(def, &parsed) {
75 continue;
76 }
77 source_defs.push(def);
78 parsed_symbols.push(parsed);
79 }
80
81 let mut symbol_to_qname: HashMap<&str, &str> = HashMap::new();
83 for def in &source_defs {
84 symbol_to_qname.insert(&def.scip_symbol, &def.qualified_name);
85 }
86
87 let mut created_node_ids: HashSet<String> = HashSet::new();
90 let mut created_edge_ids: HashSet<String> = HashSet::new();
91 let mut folded_to_parent: HashMap<String, String> = HashMap::new();
93 let mut folded_children: HashMap<String, Vec<(String, &'static str)>> = HashMap::new();
96
97 let def_chains: Vec<Vec<(String, NodeKind)>> = parsed_symbols
99 .iter()
100 .map(extract_containment_chain_from_parsed)
101 .collect();
102
103 for (def_idx, def) in source_defs.iter().enumerate() {
104 let kind = if def.is_test {
105 NodeKind::Test
106 } else {
107 def.kind
108 };
109
110 let tier3_category = match kind {
112 NodeKind::Field | NodeKind::Property => Some("fields"),
113 NodeKind::TypeParameter => Some("type_params"),
114 NodeKind::EnumVariant => Some("variants"),
115 _ => None,
116 };
117
118 if let Some(category) = tier3_category {
119 let chain = &def_chains[def_idx];
121 if chain.len() >= 2 {
122 let parent_qname = &chain[chain.len() - 2].0;
123 let leaf_name = def
124 .qualified_name
125 .rsplit([':', '.'])
126 .next()
127 .unwrap_or(&def.qualified_name);
128 folded_children
129 .entry(parent_qname.clone())
130 .or_default()
131 .push((leaf_name.to_string(), category));
132 folded_to_parent.insert(def.qualified_name.clone(), format!("sym:{parent_qname}"));
133 symbol_to_qname.insert(&def.scip_symbol, &def.qualified_name);
135 continue; }
137 }
138
139 let node_id = format!("sym:{}", def.qualified_name);
140
141 let mut payload = HashMap::new();
142 payload.insert(
143 "scip_symbol".to_string(),
144 serde_json::Value::String(def.scip_symbol.clone()),
145 );
146 payload.insert("line_start".to_string(), serde_json::json!(def.line_start));
147 payload.insert("line_end".to_string(), serde_json::json!(def.line_end));
148 payload.insert(
149 "file_path".to_string(),
150 serde_json::Value::String(def.file_path.clone()),
151 );
152 if def.is_test {
153 payload.insert("is_test".to_string(), serde_json::json!(true));
154 }
155 if def.is_generated {
156 payload.insert("is_generated".to_string(), serde_json::json!(true));
157 }
158 if let Some(type_sig) = def.documentation.first() {
160 payload.insert(
161 "type_signature".to_string(),
162 serde_json::Value::String(type_sig.clone()),
163 );
164 }
165 payload.insert(
166 "source".to_string(),
167 serde_json::Value::String("scip".to_string()),
168 );
169
170 created_node_ids.insert(node_id.clone());
171 nodes.push(GraphNode {
172 id: node_id.clone(),
173 kind,
174 label: def.qualified_name.clone(),
175 payload,
176 centrality: 0.0,
177 memory_id: None,
178 namespace: ns.clone(),
179 valid_from: None,
180 valid_to: None,
181 });
182
183 if config.hierarchical_containment {
185 let chain = &def_chains[def_idx];
186 let file_node_id = format!("file:{}", def.file_path);
187
188 if chain.len() <= 1 {
189 let edge_id = format!("contains:{file_node_id}->{node_id}");
191 if created_edge_ids.insert(edge_id.clone()) {
192 edges.push(Edge {
193 id: edge_id,
194 src: file_node_id,
195 dst: node_id.clone(),
196 relationship: RelationshipType::Contains,
197 weight: 0.1,
198 properties: scip_edge_properties(),
199 created_at: now,
200 valid_from: Some(now),
201 valid_to: None,
202 });
203 }
204 } else {
205 for (i, (seg_qname, seg_kind)) in chain.iter().enumerate() {
207 let seg_node_id = format!("sym:{seg_qname}");
208
209 if seg_qname != &def.qualified_name
211 && created_node_ids.insert(seg_node_id.clone())
212 {
213 let mut syn_payload = HashMap::new();
214 syn_payload.insert(
215 "source".to_string(),
216 serde_json::Value::String("scip-synthetic".to_string()),
217 );
218 syn_payload.insert(
219 "file_path".to_string(),
220 serde_json::Value::String(def.file_path.clone()),
221 );
222 nodes.push(GraphNode {
223 id: seg_node_id.clone(),
224 kind: *seg_kind,
225 label: seg_qname.clone(),
226 payload: syn_payload,
227 centrality: 0.0,
228 memory_id: None,
229 namespace: ns.clone(),
230 valid_from: None,
231 valid_to: None,
232 });
233 }
234
235 let parent_id = if i == 0 {
237 file_node_id.clone()
238 } else {
239 format!("sym:{}", chain[i - 1].0)
240 };
241
242 let edge_id = format!("contains:{parent_id}->{seg_node_id}");
243 if created_edge_ids.insert(edge_id.clone()) {
244 edges.push(Edge {
245 id: edge_id,
246 src: parent_id,
247 dst: seg_node_id,
248 relationship: RelationshipType::Contains,
249 weight: 0.1,
250 properties: scip_edge_properties(),
251 created_at: now,
252 valid_from: Some(now),
253 valid_to: None,
254 });
255 }
256 }
257 }
258 } else {
259 let file_node_id = format!("file:{}", def.file_path);
261 edges.push(Edge {
262 id: format!("contains:{file_node_id}->{node_id}"),
263 src: file_node_id,
264 dst: node_id.clone(),
265 relationship: RelationshipType::Contains,
266 weight: 0.1,
267 properties: scip_edge_properties(),
268 created_at: now,
269 valid_from: Some(now),
270 valid_to: None,
271 });
272 }
273
274 if config.store_docs_as_memories && !def.documentation.is_empty() {
276 let doc_text = def.documentation.join("\n");
277 let mem_id = format!("scip-doc:{}", def.qualified_name);
278 let memory = MemoryNode {
279 id: mem_id,
280 content: doc_text,
281 memory_type: MemoryType::Context,
282 importance: 0.4,
283 confidence: 1.0,
284 access_count: 0,
285 content_hash: String::new(), tags: vec!["scip-doc".to_string(), "auto-generated".to_string()],
287 metadata: HashMap::new(),
288 namespace: ns.clone(),
289 session_id: None,
290 repo: None,
291 git_ref: None,
292 expires_at: None,
293 created_at: now,
294 updated_at: now,
295 last_accessed_at: now,
296 };
297 memories.push((memory, node_id.clone()));
298 doc_memories_created += 1;
299 }
300
301 for rel in &def.relationships {
303 if rel.target_symbol.is_empty() {
304 continue;
305 }
306 let target_node_id =
308 if let Some(qname) = symbol_to_qname.get(rel.target_symbol.as_str()) {
309 format!("sym:{qname}")
310 } else {
311 match parse_external_node_id(&rel.target_symbol) {
313 Some(ext_id) => ext_id,
314 None => continue,
315 }
316 };
317
318 if rel.is_implementation {
319 edges.push(Edge {
320 id: format!("implements:{node_id}->{target_node_id}"),
321 src: node_id.clone(),
322 dst: target_node_id.clone(),
323 relationship: RelationshipType::Implements,
324 weight: 0.8,
325 properties: scip_edge_properties(),
326 created_at: now,
327 valid_from: Some(now),
328 valid_to: None,
329 });
330 if def.kind == NodeKind::Method {
332 edges.push(Edge {
333 id: format!("overrides:{node_id}->{target_node_id}"),
334 src: node_id.clone(),
335 dst: target_node_id.clone(),
336 relationship: RelationshipType::Overrides,
337 weight: 0.8,
338 properties: scip_edge_properties(),
339 created_at: now,
340 valid_from: Some(now),
341 valid_to: None,
342 });
343 }
344 }
345 if rel.is_type_definition {
346 edges.push(Edge {
347 id: format!("typedef:{node_id}->{target_node_id}"),
348 src: node_id.clone(),
349 dst: target_node_id.clone(),
350 relationship: RelationshipType::TypeDefinition,
351 weight: 0.6,
352 properties: scip_edge_properties(),
353 created_at: now,
354 valid_from: Some(now),
355 valid_to: None,
356 });
357 }
358 if rel.is_reference && !rel.is_implementation {
362 edges.push(Edge {
363 id: format!("inherits:{node_id}->{target_node_id}"),
364 src: node_id.clone(),
365 dst: target_node_id,
366 relationship: RelationshipType::Inherits,
367 weight: 0.8,
368 properties: scip_edge_properties(),
369 created_at: now,
370 valid_from: Some(now),
371 valid_to: None,
372 });
373 }
374 }
375 }
376
377 for node in &mut nodes {
379 let qname = node.label.as_str();
380 if let Some(children) = folded_children.get(qname) {
381 let mut fields = Vec::new();
382 let mut type_params = Vec::new();
383 let mut variants = Vec::new();
384 for (name, category) in children {
385 match *category {
386 "fields" => fields.push(serde_json::Value::String(name.clone())),
387 "type_params" => type_params.push(serde_json::Value::String(name.clone())),
388 "variants" => variants.push(serde_json::Value::String(name.clone())),
389 _ => {}
390 }
391 }
392 if !fields.is_empty() {
393 node.payload
394 .insert("fields".to_string(), serde_json::Value::Array(fields));
395 }
396 if !type_params.is_empty() {
397 node.payload.insert(
398 "type_params".to_string(),
399 serde_json::Value::Array(type_params),
400 );
401 }
402 if !variants.is_empty() {
403 node.payload
404 .insert("variants".to_string(), serde_json::Value::Array(variants));
405 }
406 }
407 }
408
409 if config.create_external_nodes {
414 let mut pkg_nodes_created: HashSet<String> = HashSet::new();
415 for ext in &scip.externals {
416 if ext.package_manager.is_empty() || ext.package_name.is_empty() {
417 continue;
418 }
419 let node_id = format!("pkg:{}:{}", ext.package_manager, ext.package_name);
420 if !pkg_nodes_created.insert(node_id.clone()) {
421 continue; }
423
424 let mut payload = HashMap::new();
425 payload.insert(
426 "package_manager".to_string(),
427 serde_json::Value::String(ext.package_manager.clone()),
428 );
429 payload.insert(
430 "package_name".to_string(),
431 serde_json::Value::String(ext.package_name.clone()),
432 );
433 payload.insert(
434 "package_version".to_string(),
435 serde_json::Value::String(ext.package_version.clone()),
436 );
437 payload.insert(
438 "source".to_string(),
439 serde_json::Value::String("scip".to_string()),
440 );
441
442 nodes.push(GraphNode {
443 id: node_id,
444 kind: NodeKind::External,
445 label: ext.package_name.clone(),
446 payload,
447 centrality: 0.0,
448 memory_id: None,
449 namespace: ns.clone(),
450 valid_from: None,
451 valid_to: None,
452 });
453 ext_nodes_created += 1;
454 }
455 } let mut defs_by_file: HashMap<&str, Vec<&ScipDefinition>> = HashMap::new();
462 for def in &source_defs {
463 if folded_to_parent.contains_key(&def.qualified_name) {
464 continue;
465 }
466 defs_by_file
467 .entry(def.file_path.as_str())
468 .or_default()
469 .push(def);
470 }
471
472 let source_refs: Vec<&super::ScipReference> = scip
474 .references
475 .iter()
476 .filter(|r| is_source_path(&r.file_path))
477 .collect();
478
479 let mut ref_counts: HashMap<(&str, &str), usize> = HashMap::new();
485 for r in &source_refs {
486 *ref_counts
487 .entry((&r.scip_symbol, &r.file_path))
488 .or_insert(0) += 1;
489 }
490
491 let symbol_to_kind: HashMap<&str, NodeKind> = source_defs
493 .iter()
494 .map(|d| (d.scip_symbol.as_str(), d.kind))
495 .collect();
496
497 for r in &source_refs {
498 let count = ref_counts
500 .get(&(r.scip_symbol.as_str(), r.file_path.as_str()))
501 .copied()
502 .unwrap_or(0);
503 let target_kind = symbol_to_kind.get(r.scip_symbol.as_str()).copied();
504 let limit = match target_kind {
505 Some(NodeKind::Module) => config.fan_out_limits.module,
506 Some(NodeKind::Function) => config.fan_out_limits.function,
507 Some(NodeKind::Method) => config.fan_out_limits.method,
508 Some(NodeKind::Class | NodeKind::Trait | NodeKind::Interface) => {
509 config.fan_out_limits.class
510 }
511 _ => config.max_references_per_symbol,
512 };
513 if count > limit {
514 continue;
515 }
516
517 if crate::index::blocklist::is_blocked_call_scip(&r.scip_symbol) {
519 continue;
520 }
521
522 let mut target_node_id = if let Some(qname) = symbol_to_qname.get(r.scip_symbol.as_str()) {
524 format!("sym:{qname}")
525 } else {
526 match parse_external_node_id(&r.scip_symbol) {
528 Some(ext_id) => ext_id,
529 None => continue,
530 }
531 };
532
533 if let Some(qname) = symbol_to_qname.get(r.scip_symbol.as_str()) {
535 if let Some(parent_id) = folded_to_parent.get(*qname) {
536 target_node_id = parent_id.clone();
537 }
538 }
539
540 let mut source_node_id = find_enclosing_def_indexed(&defs_by_file, &r.file_path, r.line)
543 .map(|def| format!("sym:{}", def.qualified_name))
544 .unwrap_or_else(|| format!("file:{}", r.file_path));
545
546 if let Some(parent_id) = source_node_id
548 .strip_prefix("sym:")
549 .and_then(|qn| folded_to_parent.get(qn))
550 {
551 source_node_id = parent_id.clone();
552 }
553
554 if source_node_id == target_node_id {
556 continue;
557 }
558
559 let semantic_mask = ROLE_IMPORT | ROLE_WRITE_ACCESS | ROLE_READ_ACCESS;
569 let is_scip_go_generic = r.role_bitmask & semantic_mask == ROLE_READ_ACCESS;
570
571 let (rel, weight) = if is_import_ref(r.role_bitmask) {
572 (RelationshipType::Imports, 0.5)
573 } else if is_write_ref(r.role_bitmask) {
574 (RelationshipType::Writes, 0.4)
575 } else if is_read_ref(r.role_bitmask) && !is_scip_go_generic {
576 (RelationshipType::Reads, 0.3)
577 } else {
578 match target_kind {
582 Some(NodeKind::Class | NodeKind::Interface | NodeKind::Trait | NodeKind::Type) => {
583 (RelationshipType::DependsOn, 0.3)
584 }
585 Some(NodeKind::Module | NodeKind::Package) => (RelationshipType::Imports, 0.5),
586 Some(NodeKind::Constant) => (RelationshipType::Reads, 0.3),
587 _ => (RelationshipType::Calls, 1.0),
588 }
589 };
590
591 let edge_prefix = rel.to_string().to_lowercase();
592 edges.push(Edge {
593 id: format!(
594 "{edge_prefix}:{source_node_id}->{target_node_id}:{}:{}",
595 r.file_path, r.line
596 ),
597 src: source_node_id.clone(),
598 dst: target_node_id.clone(),
599 relationship: rel,
600 weight,
601 properties: scip_edge_properties(),
602 created_at: now,
603 valid_from: Some(now),
604 valid_to: None,
605 });
606
607 if !is_import_ref(r.role_bitmask) {
613 let is_type_target = matches!(
614 target_kind,
615 Some(
616 NodeKind::Class
617 | NodeKind::Trait
618 | NodeKind::Interface
619 | NodeKind::Type
620 | NodeKind::Enum
621 )
622 );
623 if is_type_target {
624 edges.push(Edge {
625 id: format!(
626 "depends:{source_node_id}->{target_node_id}:{}:{}",
627 r.file_path, r.line
628 ),
629 src: source_node_id,
630 dst: target_node_id,
631 relationship: RelationshipType::DependsOn,
632 weight: 0.7,
633 properties: scip_edge_properties(),
634 created_at: now,
635 valid_from: Some(now),
636 valid_to: None,
637 });
638 }
639 }
640 }
641
642 let mut seen_edge_ids = HashSet::new();
644 edges.retain(|e| seen_edge_ids.insert(e.id.clone()));
645
646 if config.collapse_intra_class_edges && config.hierarchical_containment {
649 let mut child_to_parent: HashMap<&str, &str> = HashMap::new();
651 for edge in &edges {
652 if edge.relationship == RelationshipType::Contains
653 && edge.src.starts_with("sym:")
654 && edge.dst.starts_with("sym:")
655 {
656 child_to_parent.insert(&edge.dst, &edge.src);
657 }
658 }
659
660 let node_kind_map: HashMap<&str, NodeKind> =
662 nodes.iter().map(|n| (n.id.as_str(), n.kind)).collect();
663
664 let mut intra_class_counts: HashMap<String, Vec<(String, String)>> = HashMap::new();
666 let mut intra_edge_ids: HashSet<String> = HashSet::new();
667 for edge in &edges {
668 if !matches!(
669 edge.relationship,
670 RelationshipType::Calls | RelationshipType::Reads | RelationshipType::Writes
671 ) {
672 continue;
673 }
674 let src_parent = child_to_parent.get(edge.src.as_str());
675 let dst_parent = child_to_parent.get(edge.dst.as_str());
676 if let (Some(sp), Some(dp)) = (src_parent, dst_parent) {
677 let parent_kind = node_kind_map.get(sp).copied();
679 let is_class_like = matches!(
680 parent_kind,
681 Some(NodeKind::Class | NodeKind::Trait | NodeKind::Interface | NodeKind::Enum)
682 );
683 if sp == dp && is_class_like {
684 let src_leaf = edge.src.rsplit([':', '.']).next().unwrap_or(&edge.src);
686 let dst_leaf = edge.dst.rsplit([':', '.']).next().unwrap_or(&edge.dst);
687 intra_class_counts
688 .entry(sp.to_string())
689 .or_default()
690 .push((src_leaf.to_string(), dst_leaf.to_string()));
691 intra_edge_ids.insert(edge.id.clone());
692 }
693 }
694 }
695
696 if !intra_edge_ids.is_empty() {
698 edges.retain(|e| !intra_edge_ids.contains(&e.id));
699 for node in &mut nodes {
700 if let Some(calls) = intra_class_counts.get(&node.id) {
701 let call_entries: Vec<serde_json::Value> = calls
702 .iter()
703 .map(|(from, to)| serde_json::json!({"from": from, "to": to}))
704 .collect();
705 node.payload.insert(
706 "intra_class_calls".to_string(),
707 serde_json::Value::Array(call_entries),
708 );
709 }
710 }
711 }
712 }
713
714 let files_covered: HashSet<String> = scip.covered_files.iter().cloned().collect();
715
716 let existing_node_ids: HashSet<&str> = nodes.iter().map(|n| n.id.as_str()).collect();
721 let mut missing_ids: HashSet<String> = HashSet::new();
722 for edge in &edges {
723 if !existing_node_ids.contains(edge.src.as_str()) {
724 missing_ids.insert(edge.src.clone());
725 }
726 if !existing_node_ids.contains(edge.dst.as_str()) {
727 missing_ids.insert(edge.dst.clone());
728 }
729 }
730 for missing_id in &missing_ids {
731 let (kind, label) = if let Some(file_path) = missing_id.strip_prefix("file:") {
732 (NodeKind::File, file_path.to_string())
733 } else if let Some(pkg_rest) = missing_id.strip_prefix("pkg:") {
734 let label = pkg_rest.rsplit(':').next().unwrap_or(pkg_rest).to_string();
736 ext_nodes_created += 1;
737 (NodeKind::External, label)
738 } else if missing_id.starts_with("ext:") {
739 let label = missing_id
741 .rsplit(':')
742 .next()
743 .unwrap_or(missing_id)
744 .to_string();
745 ext_nodes_created += 1;
746 (NodeKind::External, label)
747 } else if let Some(qname) = missing_id.strip_prefix("sym:") {
748 let label = qname.rsplit([':', '.']).next().unwrap_or(qname).to_string();
751 (NodeKind::Method, label)
752 } else {
753 continue; };
755 let mut payload = HashMap::new();
756 payload.insert(
757 "source".to_string(),
758 serde_json::Value::String("scip".to_string()),
759 );
760 nodes.push(GraphNode {
761 id: missing_id.clone(),
762 kind,
763 label,
764 payload,
765 centrality: 0.0,
766 memory_id: None,
767 namespace: ns.clone(),
768 valid_from: None,
769 valid_to: None,
770 });
771 }
772
773 let valid_node_ids: HashSet<&str> = nodes.iter().map(|n| n.id.as_str()).collect();
783 let edge_count_before = edges.len();
784 edges.retain(|e| {
785 if is_stdlib_package(&e.dst) {
787 return false;
788 }
789 let src_ok = valid_node_ids.contains(e.src.as_str())
790 || e.src.starts_with("file:")
791 || e.src.starts_with("pkg:");
792 let dst_ok = valid_node_ids.contains(e.dst.as_str())
793 || e.dst.starts_with("file:")
794 || e.dst.starts_with("pkg:");
795 src_ok && dst_ok
796 });
797 let edges_dropped = edge_count_before - edges.len();
798 if edges_dropped > 0 {
799 tracing::debug!("Dropped {edges_dropped} SCIP edges referencing filtered/stdlib nodes");
800 }
801
802 ScipBuildResult {
803 nodes,
804 edges,
805 memories,
806 ext_nodes_created,
807 files_covered,
808 doc_memories_created,
809 }
810}
811
812fn find_enclosing_def_indexed<'a>(
815 defs_by_file: &HashMap<&str, Vec<&'a ScipDefinition>>,
816 file_path: &str,
817 line: u32,
818) -> Option<&'a ScipDefinition> {
819 defs_by_file
820 .get(file_path)?
821 .iter()
822 .filter(|d| d.line_start <= line && d.line_end >= line)
823 .min_by_key(|d| d.line_end - d.line_start)
824 .copied()
825}
826
827fn is_source_path(path: &str) -> bool {
832 if path.starts_with('/') || path.starts_with("..") {
834 return false;
835 }
836 let reject_dirs = [
838 "node_modules/",
839 ".venv/",
840 "site-packages/",
841 "__pycache__/",
842 ".gradle/",
843 ".m2/",
844 "/go-build/",
845 "vendor/", "dist/",
847 "build/",
848 ];
849 if reject_dirs.iter().any(|r| path.contains(r)) {
850 return false;
851 }
852 if path.contains("__generated__") || path.contains(".generated.") {
854 return false;
855 }
856 if path.ends_with(".bundle.js")
858 || path.ends_with(".min.js")
859 || path.ends_with(".min.css")
860 || path.contains("/webpack_bundles/")
861 {
862 return false;
863 }
864 true
865}
866
867fn is_noise_symbol(def: &ScipDefinition, parsed: &scip::types::Symbol) -> bool {
876 if def.is_generated {
878 return true;
879 }
880
881 if parsed
883 .descriptors
884 .iter()
885 .any(|d| d.name.contains("typeLiteral"))
886 {
887 return true;
888 }
889
890 let leaf = match parsed.descriptors.last() {
891 Some(d) => d,
892 None => return false,
893 };
894
895 use scip::types::descriptor::Suffix;
896 match leaf.suffix.enum_value() {
897 Ok(Suffix::Parameter | Suffix::TypeParameter | Suffix::Local) => return true,
901 Ok(Suffix::Meta) => return true,
903 Ok(Suffix::Term) => {
905 let parent_suffix = parsed
907 .descriptors
908 .iter()
909 .rev()
910 .nth(1)
911 .and_then(|d| d.suffix.enum_value().ok());
912 if matches!(parent_suffix, Some(Suffix::Method)) {
913 return true;
914 }
915 if has_trailing_digits(&leaf.name) {
918 return true;
919 }
920 }
921 _ => {}
922 }
923
924 false
925}
926
927fn has_trailing_digits(name: &str) -> bool {
929 name.len() > 1 && name.ends_with(|c: char| c.is_ascii_digit())
930}
931
932fn is_stdlib_package(node_id: &str) -> bool {
938 matches!(
939 node_id,
940 "pkg:npm:typescript"
941 | "pkg:npm:@types/node"
942 | "pkg:python:python-stdlib"
943 | "pkg:python:typing_extensions"
944 | "pkg:python:builtins"
945 | "pkg:maven:java.lang"
946 | "pkg:maven:java.util"
947 | "pkg:maven:java.io"
948 | "pkg:go:builtin"
949 | "pkg:go:fmt"
950 | "pkg:cargo:std"
951 | "pkg:cargo:core"
952 | "pkg:cargo:alloc"
953 )
954}
955
956fn parse_external_node_id(scip_symbol: &str) -> Option<String> {
961 let parsed = scip::symbol::parse_symbol(scip_symbol).ok()?;
962 let package = parsed.package.as_ref()?;
963 if package.manager.is_empty() || package.name.is_empty() {
964 return None;
965 }
966 Some(format!("pkg:{}:{}", package.manager, package.name))
967}
968
969fn extract_containment_chain_from_parsed(parsed: &scip::types::Symbol) -> Vec<(String, NodeKind)> {
976 let scheme = &parsed.scheme;
978 let sep = if scheme == "rust-analyzer" || scheme == "lsif-clang" {
979 "::"
980 } else {
981 "."
982 };
983
984 let mut chain = Vec::new();
985 let mut cumulative_parts: Vec<&str> = Vec::new();
986 let leaf_kind = super::infer_kind_from_parsed(parsed);
987
988 for desc in &parsed.descriptors {
989 if desc.name.is_empty() {
990 continue;
991 }
992 cumulative_parts.push(&desc.name);
993 let qname = cumulative_parts.join(sep);
994 let seg_kind = if cumulative_parts.len() < parsed.descriptors.len() {
996 use scip::types::descriptor::Suffix;
997 match desc.suffix.enum_value() {
998 Ok(Suffix::Package | Suffix::Namespace) => NodeKind::Module,
999 Ok(Suffix::Type) => NodeKind::Class,
1000 Ok(Suffix::Method) => NodeKind::Method,
1001 Ok(Suffix::Macro) => NodeKind::Macro,
1002 _ => NodeKind::Module,
1003 }
1004 } else {
1005 leaf_kind
1006 };
1007 chain.push((qname, seg_kind));
1008 }
1009
1010 chain
1011}
1012
1013fn is_wildcard_module(qualified_name: &str) -> bool {
1019 qualified_name.contains("'*")
1022}
1023
1024const SCIP_BASE_CONFIDENCE: f64 = 0.15;
1028
1029fn scip_edge_properties() -> HashMap<String, serde_json::Value> {
1030 use std::sync::LazyLock;
1031 static PROPS: LazyLock<HashMap<String, serde_json::Value>> = LazyLock::new(|| {
1032 let mut props = HashMap::new();
1033 props.insert(
1034 "source".to_string(),
1035 serde_json::Value::String("scip".to_string()),
1036 );
1037 props.insert(
1038 "confidence".to_string(),
1039 serde_json::json!(SCIP_BASE_CONFIDENCE),
1040 );
1041 props.insert("source_layers".to_string(), serde_json::json!(["scip"]));
1042 props
1043 });
1044 PROPS.clone()
1045}
1046
1047#[cfg(test)]
1048#[path = "../tests/scip_graph_builder_tests.rs"]
1049mod tests;