use perl_semantic_facts::{EdgeKind, EntityId, FileId, OccurrenceKind, ReferenceEdge};
use std::collections::HashMap;
use crate::workspace::workspace_index::FileFactShard;
#[derive(Debug, Default)]
pub struct ReferenceIndex {
references_by_name: HashMap<String, Vec<ReferenceEdge>>,
references_by_entity: HashMap<EntityId, Vec<ReferenceEdge>>,
indexed_files: HashMap<String, FileId>,
}
impl ReferenceIndex {
pub fn new() -> Self {
Self::default()
}
pub fn add_file(&mut self, shard: &FileFactShard) {
self.indexed_files.insert(shard.source_uri.clone(), shard.file_id);
let mut edge_targets: HashMap<u64, Vec<EntityId>> = HashMap::new();
for edge in &shard.edges {
if edge.kind == EdgeKind::References {
if let Some(occ_id) = edge.via_occurrence_id {
edge_targets.entry(occ_id.0).or_default().push(edge.to_entity_id);
}
}
}
for occ in &shard.occurrences {
if occ.kind == OccurrenceKind::Definition {
continue;
}
let target_candidates = match edge_targets.get(&occ.id.0) {
Some(targets) => targets.clone(),
None => occ.entity_id.into_iter().collect(),
};
let symbol_key = self.derive_symbol_key(shard, occ);
let ref_edge = ReferenceEdge::new(
occ.id,
occ.anchor_id,
shard.file_id,
symbol_key.clone(),
target_candidates.clone(),
occ.kind,
occ.provenance,
occ.confidence,
);
self.references_by_name.entry(symbol_key).or_default().push(ref_edge.clone());
for entity_id in &target_candidates {
self.references_by_entity.entry(*entity_id).or_default().push(ref_edge.clone());
}
}
}
pub fn remove_file(&mut self, source_uri: &str) {
let file_id = match self.indexed_files.remove(source_uri) {
Some(id) => id,
None => return,
};
for refs in self.references_by_name.values_mut() {
refs.retain(|r| r.file_id != file_id);
}
self.references_by_name.retain(|_, v| !v.is_empty());
for refs in self.references_by_entity.values_mut() {
refs.retain(|r| r.file_id != file_id);
}
self.references_by_entity.retain(|_, v| !v.is_empty());
}
pub fn get_by_name(&self, symbol_key: &str) -> &[ReferenceEdge] {
self.references_by_name.get(symbol_key).map(Vec::as_slice).unwrap_or_default()
}
pub fn get_by_entity(&self, entity_id: EntityId) -> &[ReferenceEdge] {
self.references_by_entity.get(&entity_id).map(Vec::as_slice).unwrap_or_default()
}
pub fn name_count(&self) -> usize {
self.references_by_name.len()
}
pub fn entity_count(&self) -> usize {
self.references_by_entity.len()
}
fn derive_symbol_key(
&self,
shard: &FileFactShard,
occ: &perl_semantic_facts::OccurrenceFact,
) -> String {
if let Some(entity_id) = occ.entity_id {
if let Some(entity) = shard.entities.iter().find(|e| e.id == entity_id) {
return entity.canonical_name.clone();
}
}
format!("__unresolved_anchor_{}", occ.anchor_id.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
use perl_semantic_facts::{
AnchorFact, AnchorId, Confidence, EdgeFact, EdgeId, EntityFact, EntityKind, OccurrenceFact,
OccurrenceId, Provenance, ScopeId,
};
fn sample_shard() -> FileFactShard {
let file_id = FileId(1);
let entity_id = EntityId(100);
let anchor_def = AnchorId(10);
let anchor_ref = AnchorId(20);
let occ_id = OccurrenceId(400);
FileFactShard {
source_uri: "file:///lib/Foo.pm".to_string(),
file_id,
content_hash: 999,
anchors_hash: None,
entities_hash: None,
occurrences_hash: None,
edges_hash: None,
anchors: vec![
AnchorFact {
id: anchor_def,
file_id,
span_start_byte: 0,
span_end_byte: 10,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
},
AnchorFact {
id: anchor_ref,
file_id,
span_start_byte: 50,
span_end_byte: 55,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
},
],
entities: vec![EntityFact {
id: entity_id,
kind: EntityKind::Subroutine,
canonical_name: "Foo::bar".to_string(),
anchor_id: Some(anchor_def),
scope_id: Some(ScopeId(1)),
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
occurrences: vec![OccurrenceFact {
id: occ_id,
kind: OccurrenceKind::Call,
entity_id: Some(entity_id),
anchor_id: anchor_ref,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
edges: vec![EdgeFact {
id: EdgeId(500),
kind: EdgeKind::References,
from_entity_id: EntityId(0), to_entity_id: entity_id,
via_occurrence_id: Some(occ_id),
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
}
}
#[test]
fn add_file_populates_name_index() -> Result<(), Box<dyn std::error::Error>> {
let mut index = ReferenceIndex::new();
index.add_file(&sample_shard());
let refs = index.get_by_name("Foo::bar");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].kind, OccurrenceKind::Call);
assert_eq!(refs[0].symbol_key, "Foo::bar");
Ok(())
}
#[test]
fn add_file_populates_entity_index() -> Result<(), Box<dyn std::error::Error>> {
let mut index = ReferenceIndex::new();
index.add_file(&sample_shard());
let refs = index.get_by_entity(EntityId(100));
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].occurrence_id, OccurrenceId(400));
Ok(())
}
#[test]
fn remove_file_clears_entries() -> Result<(), Box<dyn std::error::Error>> {
let mut index = ReferenceIndex::new();
index.add_file(&sample_shard());
assert_eq!(index.name_count(), 1);
assert_eq!(index.entity_count(), 1);
index.remove_file("file:///lib/Foo.pm");
assert_eq!(index.name_count(), 0);
assert_eq!(index.entity_count(), 0);
assert!(index.get_by_name("Foo::bar").is_empty());
assert!(index.get_by_entity(EntityId(100)).is_empty());
Ok(())
}
#[test]
fn remove_file_is_idempotent() -> Result<(), Box<dyn std::error::Error>> {
let mut index = ReferenceIndex::new();
index.add_file(&sample_shard());
index.remove_file("file:///lib/Foo.pm");
index.remove_file("file:///lib/Foo.pm");
assert_eq!(index.name_count(), 0);
assert_eq!(index.entity_count(), 0);
Ok(())
}
#[test]
fn remove_unknown_file_is_noop() -> Result<(), Box<dyn std::error::Error>> {
let mut index = ReferenceIndex::new();
index.add_file(&sample_shard());
index.remove_file("file:///nonexistent.pm");
assert_eq!(index.name_count(), 1);
assert_eq!(index.entity_count(), 1);
Ok(())
}
#[test]
fn definition_occurrences_are_excluded() -> Result<(), Box<dyn std::error::Error>> {
let file_id = FileId(2);
let entity_id = EntityId(200);
let anchor_id = AnchorId(30);
let shard = FileFactShard {
source_uri: "file:///lib/Defs.pm".to_string(),
file_id,
content_hash: 111,
anchors_hash: None,
entities_hash: None,
occurrences_hash: None,
edges_hash: None,
anchors: vec![AnchorFact {
id: anchor_id,
file_id,
span_start_byte: 0,
span_end_byte: 5,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
entities: vec![EntityFact {
id: entity_id,
kind: EntityKind::Subroutine,
canonical_name: "Defs::init".to_string(),
anchor_id: Some(anchor_id),
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
occurrences: vec![OccurrenceFact {
id: OccurrenceId(600),
kind: OccurrenceKind::Definition,
entity_id: Some(entity_id),
anchor_id,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
edges: vec![],
};
let mut index = ReferenceIndex::new();
index.add_file(&shard);
assert_eq!(index.name_count(), 0);
assert_eq!(index.entity_count(), 0);
Ok(())
}
#[test]
fn multiple_files_coexist() -> Result<(), Box<dyn std::error::Error>> {
let shard_a = sample_shard();
let file_id_b = FileId(2);
let entity_id = EntityId(100); let occ_id_b = OccurrenceId(700);
let anchor_b = AnchorId(40);
let shard_b = FileFactShard {
source_uri: "file:///lib/Bar.pm".to_string(),
file_id: file_id_b,
content_hash: 888,
anchors_hash: None,
entities_hash: None,
occurrences_hash: None,
edges_hash: None,
anchors: vec![AnchorFact {
id: anchor_b,
file_id: file_id_b,
span_start_byte: 10,
span_end_byte: 18,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
entities: vec![EntityFact {
id: entity_id,
kind: EntityKind::Subroutine,
canonical_name: "Foo::bar".to_string(),
anchor_id: None,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
occurrences: vec![OccurrenceFact {
id: occ_id_b,
kind: OccurrenceKind::Call,
entity_id: Some(entity_id),
anchor_id: anchor_b,
scope_id: None,
provenance: Provenance::NameHeuristic,
confidence: Confidence::Medium,
}],
edges: vec![],
};
let mut index = ReferenceIndex::new();
index.add_file(&shard_a);
index.add_file(&shard_b);
assert_eq!(index.get_by_name("Foo::bar").len(), 2);
assert_eq!(index.get_by_entity(entity_id).len(), 2);
index.remove_file("file:///lib/Foo.pm");
assert_eq!(index.get_by_name("Foo::bar").len(), 1);
assert_eq!(index.get_by_entity(entity_id).len(), 1);
assert_eq!(index.get_by_name("Foo::bar")[0].file_id, file_id_b);
Ok(())
}
#[test]
fn incremental_reindex_replaces_entries() -> Result<(), Box<dyn std::error::Error>> {
let mut index = ReferenceIndex::new();
index.add_file(&sample_shard());
assert_eq!(index.get_by_name("Foo::bar").len(), 1);
index.remove_file("file:///lib/Foo.pm");
let file_id = FileId(1);
let entity_id = EntityId(100);
let updated_shard = FileFactShard {
source_uri: "file:///lib/Foo.pm".to_string(),
file_id,
content_hash: 1000,
anchors_hash: None,
entities_hash: None,
occurrences_hash: None,
edges_hash: None,
anchors: vec![AnchorFact {
id: AnchorId(50),
file_id,
span_start_byte: 60,
span_end_byte: 68,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
entities: vec![EntityFact {
id: entity_id,
kind: EntityKind::Subroutine,
canonical_name: "Foo::bar".to_string(),
anchor_id: None,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
occurrences: vec![OccurrenceFact {
id: OccurrenceId(800),
kind: OccurrenceKind::Read,
entity_id: Some(entity_id),
anchor_id: AnchorId(50),
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
edges: vec![],
};
index.add_file(&updated_shard);
let refs = index.get_by_name("Foo::bar");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].occurrence_id, OccurrenceId(800));
assert_eq!(refs[0].kind, OccurrenceKind::Read);
Ok(())
}
#[test]
fn unresolved_occurrence_uses_fallback_key() -> Result<(), Box<dyn std::error::Error>> {
let file_id = FileId(3);
let anchor_id = AnchorId(60);
let shard = FileFactShard {
source_uri: "file:///lib/Unresolved.pm".to_string(),
file_id,
content_hash: 222,
anchors_hash: None,
entities_hash: None,
occurrences_hash: None,
edges_hash: None,
anchors: vec![AnchorFact {
id: anchor_id,
file_id,
span_start_byte: 0,
span_end_byte: 8,
scope_id: None,
provenance: Provenance::NameHeuristic,
confidence: Confidence::Low,
}],
entities: vec![],
occurrences: vec![OccurrenceFact {
id: OccurrenceId(900),
kind: OccurrenceKind::Call,
entity_id: None,
anchor_id,
scope_id: None,
provenance: Provenance::NameHeuristic,
confidence: Confidence::Low,
}],
edges: vec![],
};
let mut index = ReferenceIndex::new();
index.add_file(&shard);
let fallback_key = "__unresolved_anchor_60";
let refs = index.get_by_name(fallback_key);
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].confidence, Confidence::Low);
assert_eq!(index.entity_count(), 0);
Ok(())
}
#[test]
fn edge_targets_populate_candidates() -> Result<(), Box<dyn std::error::Error>> {
let mut index = ReferenceIndex::new();
index.add_file(&sample_shard());
let refs = index.get_by_entity(EntityId(100));
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].target_candidates, vec![EntityId(100)]);
Ok(())
}
#[test]
fn multiple_edge_targets_produce_multiple_entity_entries()
-> Result<(), Box<dyn std::error::Error>> {
let file_id = FileId(4);
let occ_id = OccurrenceId(1000);
let anchor_id = AnchorId(70);
let entity_a = EntityId(300);
let entity_b = EntityId(301);
let shard = FileFactShard {
source_uri: "file:///lib/Ambiguous.pm".to_string(),
file_id,
content_hash: 333,
anchors_hash: None,
entities_hash: None,
occurrences_hash: None,
edges_hash: None,
anchors: vec![AnchorFact {
id: anchor_id,
file_id,
span_start_byte: 0,
span_end_byte: 5,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
}],
entities: vec![
EntityFact {
id: entity_a,
kind: EntityKind::Subroutine,
canonical_name: "ambig_func".to_string(),
anchor_id: None,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::High,
},
EntityFact {
id: entity_b,
kind: EntityKind::Subroutine,
canonical_name: "ambig_func".to_string(),
anchor_id: None,
scope_id: None,
provenance: Provenance::NameHeuristic,
confidence: Confidence::Low,
},
],
occurrences: vec![OccurrenceFact {
id: occ_id,
kind: OccurrenceKind::Call,
entity_id: Some(entity_a),
anchor_id,
scope_id: None,
provenance: Provenance::ExactAst,
confidence: Confidence::Medium,
}],
edges: vec![
EdgeFact {
id: EdgeId(1001),
kind: EdgeKind::References,
from_entity_id: EntityId(0),
to_entity_id: entity_a,
via_occurrence_id: Some(occ_id),
provenance: Provenance::ExactAst,
confidence: Confidence::High,
},
EdgeFact {
id: EdgeId(1002),
kind: EdgeKind::References,
from_entity_id: EntityId(0),
to_entity_id: entity_b,
via_occurrence_id: Some(occ_id),
provenance: Provenance::NameHeuristic,
confidence: Confidence::Low,
},
],
};
let mut index = ReferenceIndex::new();
index.add_file(&shard);
let refs_a = index.get_by_entity(entity_a);
assert_eq!(refs_a.len(), 1);
assert_eq!(refs_a[0].target_candidates.len(), 2);
let refs_b = index.get_by_entity(entity_b);
assert_eq!(refs_b.len(), 1);
assert_eq!(refs_b[0].target_candidates.len(), 2);
let refs_name = index.get_by_name("ambig_func");
assert_eq!(refs_name.len(), 1);
assert_eq!(refs_name[0].target_candidates.len(), 2);
Ok(())
}
}