Skip to main content

agentic_memory/v3/indexes/
entity.rs

1//! Inverted index for entity mentions. O(1) lookup by entity.
2
3use super::{Index, IndexResult};
4use crate::v3::block::{Block, BlockContent, BlockHash};
5use std::collections::{HashMap, HashSet};
6
7/// Entity type categorization
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum EntityType {
10    File,
11    Directory,
12    Person,
13    Project,
14    Tool,
15    Concept,
16    Other,
17}
18
19/// Inverted index for entity mentions.
20pub struct EntityIndex {
21    /// Entity -> blocks that mention it
22    by_entity: HashMap<String, HashSet<u64>>,
23
24    /// Block hashes
25    hashes: HashMap<u64, BlockHash>,
26
27    /// Entity types for categorization
28    entity_types: HashMap<String, EntityType>,
29}
30
31impl EntityIndex {
32    pub fn new() -> Self {
33        Self {
34            by_entity: HashMap::new(),
35            hashes: HashMap::new(),
36            entity_types: HashMap::new(),
37        }
38    }
39
40    /// Add entity mention
41    pub fn add_mention(&mut self, entity: &str, sequence: u64, entity_type: EntityType) {
42        self.by_entity
43            .entry(entity.to_string())
44            .or_default()
45            .insert(sequence);
46        self.entity_types.insert(entity.to_string(), entity_type);
47    }
48
49    /// Query blocks mentioning an entity
50    pub fn query_entity(&self, entity: &str) -> Vec<IndexResult> {
51        self.by_entity
52            .get(entity)
53            .map(|sequences| {
54                sequences
55                    .iter()
56                    .filter_map(|&seq| {
57                        self.hashes.get(&seq).map(|&hash| IndexResult {
58                            block_sequence: seq,
59                            block_hash: hash,
60                            score: 1.0,
61                        })
62                    })
63                    .collect()
64            })
65            .unwrap_or_default()
66    }
67
68    /// Query blocks mentioning entities matching a prefix
69    pub fn query_prefix(&self, prefix: &str) -> Vec<IndexResult> {
70        let mut sequences = HashSet::new();
71
72        for (entity, seqs) in &self.by_entity {
73            if entity.starts_with(prefix) {
74                sequences.extend(seqs);
75            }
76        }
77
78        sequences
79            .iter()
80            .filter_map(|&seq| {
81                self.hashes.get(&seq).map(|&hash| IndexResult {
82                    block_sequence: seq,
83                    block_hash: hash,
84                    score: 1.0,
85                })
86            })
87            .collect()
88    }
89
90    /// Get all entities of a type
91    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<String> {
92        self.entity_types
93            .iter()
94            .filter(|(_, &t)| t == entity_type)
95            .map(|(e, _)| e.clone())
96            .collect()
97    }
98
99    /// Get all files mentioned
100    pub fn get_all_files(&self) -> Vec<String> {
101        self.get_entities_by_type(EntityType::File)
102    }
103
104    /// Get indexed entity count
105    pub fn len(&self) -> usize {
106        self.by_entity.len()
107    }
108
109    /// Check if empty
110    pub fn is_empty(&self) -> bool {
111        self.by_entity.is_empty()
112    }
113}
114
115impl Default for EntityIndex {
116    fn default() -> Self {
117        Self::new()
118    }
119}
120
121impl Index for EntityIndex {
122    fn index(&mut self, block: &Block) {
123        self.hashes.insert(block.sequence, block.hash);
124
125        match &block.content {
126            BlockContent::File { path, .. } => {
127                self.add_mention(path, block.sequence, EntityType::File);
128
129                // Also index parent directories
130                let parts: Vec<&str> = path.split('/').collect();
131                for i in 1..parts.len() {
132                    let dir = parts[..i].join("/");
133                    self.add_mention(&dir, block.sequence, EntityType::Directory);
134                }
135            }
136            BlockContent::Tool { tool_name, .. } => {
137                self.add_mention(tool_name, block.sequence, EntityType::Tool);
138            }
139            BlockContent::Text { text, .. } => {
140                // Extract file paths mentioned in text
141                for word in text.split_whitespace() {
142                    if word.contains('/') && !word.starts_with("http") {
143                        self.add_mention(word, block.sequence, EntityType::File);
144                    }
145                }
146            }
147            _ => {}
148        }
149    }
150
151    fn remove(&mut self, sequence: u64) {
152        self.hashes.remove(&sequence);
153        for sequences in self.by_entity.values_mut() {
154            sequences.remove(&sequence);
155        }
156    }
157
158    fn rebuild(&mut self, blocks: impl Iterator<Item = Block>) {
159        self.by_entity.clear();
160        self.hashes.clear();
161        self.entity_types.clear();
162        for block in blocks {
163            self.index(&block);
164        }
165    }
166}