Skip to main content

uni_store/storage/
vid_labels.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4//! VID-to-Labels index for the new storage model.
5//!
6//! In the new storage design, VIDs no longer embed label information.
7//! This index provides efficient lookups from VID to labels and vice versa.
8//! It's rebuilt from the main vertex table on startup.
9
10use std::collections::{HashMap, HashSet};
11use uni_common::core::id::Vid;
12
13/// In-memory index mapping VIDs to their labels.
14///
15/// This index is rebuilt from the main vertex table on database open.
16/// It provides O(1) lookups for:
17/// - VID → labels (which labels does this vertex have?)
18/// - label → VIDs (which vertices have this label?)
19#[derive(Debug, Clone, Default)]
20pub struct VidLabelsIndex {
21    /// VID to labels mapping
22    vid_to_labels: HashMap<Vid, Vec<String>>,
23    /// Label to VIDs mapping (for label scans)
24    label_to_vids: HashMap<String, HashSet<Vid>>,
25}
26
27impl VidLabelsIndex {
28    /// Creates a new empty index.
29    pub fn new() -> Self {
30        Self {
31            vid_to_labels: HashMap::new(),
32            label_to_vids: HashMap::new(),
33        }
34    }
35
36    /// Creates an index with pre-allocated capacity.
37    pub fn with_capacity(num_vertices: usize, num_labels: usize) -> Self {
38        Self {
39            vid_to_labels: HashMap::with_capacity(num_vertices),
40            label_to_vids: HashMap::with_capacity(num_labels),
41        }
42    }
43
44    /// Inserts a VID with its labels.
45    ///
46    /// If the VID already exists, its labels are replaced.
47    pub fn insert(&mut self, vid: Vid, labels: Vec<String>) {
48        // Remove old label mappings if this VID exists
49        if let Some(old_labels) = self.vid_to_labels.get(&vid) {
50            for label in old_labels {
51                if let Some(vids) = self.label_to_vids.get_mut(label) {
52                    vids.remove(&vid);
53                }
54            }
55        }
56
57        // Add new label mappings
58        for label in &labels {
59            self.label_to_vids
60                .entry(label.clone())
61                .or_default()
62                .insert(vid);
63        }
64
65        self.vid_to_labels.insert(vid, labels);
66    }
67
68    /// Adds a label to an existing VID.
69    ///
70    /// If the VID doesn't exist, creates it with just this label.
71    pub fn add_label(&mut self, vid: Vid, label: String) {
72        // Add to vid_to_labels
73        let labels = self.vid_to_labels.entry(vid).or_default();
74        if !labels.contains(&label) {
75            labels.push(label.clone());
76        }
77
78        // Add to label_to_vids
79        self.label_to_vids.entry(label).or_default().insert(vid);
80    }
81
82    /// Removes a label from a VID.
83    ///
84    /// Returns true if the label was removed, false if it wasn't present.
85    pub fn remove_label(&mut self, vid: Vid, label: &str) -> bool {
86        let removed = if let Some(labels) = self.vid_to_labels.get_mut(&vid) {
87            if let Some(pos) = labels.iter().position(|l| l == label) {
88                labels.remove(pos);
89                true
90            } else {
91                false
92            }
93        } else {
94            false
95        };
96
97        if removed && let Some(vids) = self.label_to_vids.get_mut(label) {
98            vids.remove(&vid);
99        }
100
101        removed
102    }
103
104    /// Removes a VID entirely from the index.
105    pub fn remove_vid(&mut self, vid: Vid) {
106        if let Some(labels) = self.vid_to_labels.remove(&vid) {
107            for label in labels {
108                if let Some(vids) = self.label_to_vids.get_mut(&label) {
109                    vids.remove(&vid);
110                }
111            }
112        }
113    }
114
115    /// Gets the labels for a VID.
116    pub fn get_labels(&self, vid: Vid) -> Option<&[String]> {
117        self.vid_to_labels.get(&vid).map(|v| v.as_slice())
118    }
119
120    /// Checks if a VID has a specific label.
121    pub fn has_label(&self, vid: Vid, label: &str) -> bool {
122        self.vid_to_labels
123            .get(&vid)
124            .map(|labels| labels.iter().any(|l| l == label))
125            .unwrap_or(false)
126    }
127
128    /// Checks if a VID has all the specified labels.
129    pub fn has_all_labels(&self, vid: Vid, required_labels: &[&str]) -> bool {
130        if let Some(labels) = self.vid_to_labels.get(&vid) {
131            required_labels
132                .iter()
133                .all(|req| labels.iter().any(|l| l == *req))
134        } else {
135            false
136        }
137    }
138
139    /// Gets all VIDs with a specific label.
140    pub fn get_vids_with_label(&self, label: &str) -> Option<&HashSet<Vid>> {
141        self.label_to_vids.get(label)
142    }
143
144    /// Gets all VIDs that have ALL the specified labels.
145    pub fn get_vids_with_all_labels(&self, labels: &[&str]) -> HashSet<Vid> {
146        if labels.is_empty() {
147            return HashSet::new();
148        }
149
150        // Start with the smallest set for efficiency
151        let mut sets: Vec<_> = labels
152            .iter()
153            .filter_map(|label| self.label_to_vids.get(*label))
154            .collect();
155
156        if sets.is_empty() {
157            return HashSet::new();
158        }
159
160        // Sort by size (smallest first)
161        sets.sort_by_key(|s| s.len());
162
163        // Intersect all sets
164        let mut result = sets[0].clone();
165        for set in sets.iter().skip(1) {
166            result.retain(|vid| set.contains(vid));
167            if result.is_empty() {
168                break;
169            }
170        }
171
172        result
173    }
174
175    /// Returns the total number of vertices in the index.
176    pub fn len(&self) -> usize {
177        self.vid_to_labels.len()
178    }
179
180    /// Returns true if the index is empty.
181    pub fn is_empty(&self) -> bool {
182        self.vid_to_labels.is_empty()
183    }
184
185    /// Returns the number of distinct labels.
186    pub fn label_count(&self) -> usize {
187        self.label_to_vids.len()
188    }
189
190    /// Returns an iterator over all known labels.
191    pub fn labels(&self) -> impl Iterator<Item = &str> {
192        self.label_to_vids.keys().map(|s| s.as_str())
193    }
194
195    /// Returns an iterator over all (VID, labels) pairs.
196    pub fn iter(&self) -> impl Iterator<Item = (Vid, &[String])> {
197        self.vid_to_labels
198            .iter()
199            .map(|(&vid, labels)| (vid, labels.as_slice()))
200    }
201
202    /// Clears the index.
203    pub fn clear(&mut self) {
204        self.vid_to_labels.clear();
205        self.label_to_vids.clear();
206    }
207
208    /// Approximate memory usage in bytes.
209    pub fn memory_usage(&self) -> usize {
210        let vid_to_labels_size = self.vid_to_labels.capacity()
211            * (std::mem::size_of::<Vid>() + std::mem::size_of::<Vec<String>>());
212        let label_to_vids_size = self.label_to_vids.capacity()
213            * (std::mem::size_of::<String>() + std::mem::size_of::<HashSet<Vid>>());
214        vid_to_labels_size + label_to_vids_size
215    }
216}
217
218/// Index mapping edge types to their EIDs.
219///
220/// Similar to VidLabelsIndex but for edges. Since edges have a single type
221/// (not multi-label like vertices), this is simpler.
222#[derive(Debug, Clone, Default)]
223pub struct EidTypeIndex {
224    /// EID to type mapping
225    eid_to_type: HashMap<uni_common::core::id::Eid, String>,
226    /// Type to EIDs mapping
227    type_to_eids: HashMap<String, HashSet<uni_common::core::id::Eid>>,
228}
229
230impl EidTypeIndex {
231    pub fn new() -> Self {
232        Self {
233            eid_to_type: HashMap::new(),
234            type_to_eids: HashMap::new(),
235        }
236    }
237
238    pub fn insert(&mut self, eid: uni_common::core::id::Eid, edge_type: String) {
239        // Remove old type mapping if this EID exists
240        if let Some(old_type) = self.eid_to_type.get(&eid)
241            && let Some(eids) = self.type_to_eids.get_mut(old_type)
242        {
243            eids.remove(&eid);
244        }
245
246        self.type_to_eids
247            .entry(edge_type.clone())
248            .or_default()
249            .insert(eid);
250        self.eid_to_type.insert(eid, edge_type);
251    }
252
253    pub fn get_type(&self, eid: uni_common::core::id::Eid) -> Option<&str> {
254        self.eid_to_type.get(&eid).map(|s| s.as_str())
255    }
256
257    pub fn get_eids_with_type(
258        &self,
259        edge_type: &str,
260    ) -> Option<&HashSet<uni_common::core::id::Eid>> {
261        self.type_to_eids.get(edge_type)
262    }
263
264    pub fn len(&self) -> usize {
265        self.eid_to_type.len()
266    }
267
268    pub fn is_empty(&self) -> bool {
269        self.eid_to_type.is_empty()
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn test_vid_labels_basic() {
279        let mut index = VidLabelsIndex::new();
280
281        let vid1 = Vid::new(1);
282        let vid2 = Vid::new(2);
283        let vid3 = Vid::new(3);
284
285        index.insert(vid1, vec!["Person".to_string()]);
286        index.insert(vid2, vec!["Person".to_string(), "Employee".to_string()]);
287        index.insert(vid3, vec!["Company".to_string()]);
288
289        assert_eq!(index.get_labels(vid1), Some(&["Person".to_string()][..]));
290        assert_eq!(
291            index.get_labels(vid2),
292            Some(&["Person".to_string(), "Employee".to_string()][..])
293        );
294        assert!(index.has_label(vid1, "Person"));
295        assert!(!index.has_label(vid1, "Employee"));
296        assert!(index.has_all_labels(vid2, &["Person", "Employee"]));
297    }
298
299    #[test]
300    fn test_get_vids_with_label() {
301        let mut index = VidLabelsIndex::new();
302
303        index.insert(Vid::new(1), vec!["Person".to_string()]);
304        index.insert(
305            Vid::new(2),
306            vec!["Person".to_string(), "Employee".to_string()],
307        );
308        index.insert(Vid::new(3), vec!["Company".to_string()]);
309
310        let persons = index.get_vids_with_label("Person").unwrap();
311        assert_eq!(persons.len(), 2);
312        assert!(persons.contains(&Vid::new(1)));
313        assert!(persons.contains(&Vid::new(2)));
314    }
315
316    #[test]
317    fn test_get_vids_with_all_labels() {
318        let mut index = VidLabelsIndex::new();
319
320        index.insert(Vid::new(1), vec!["Person".to_string()]);
321        index.insert(
322            Vid::new(2),
323            vec!["Person".to_string(), "Employee".to_string()],
324        );
325        index.insert(
326            Vid::new(3),
327            vec![
328                "Person".to_string(),
329                "Employee".to_string(),
330                "Manager".to_string(),
331            ],
332        );
333
334        let result = index.get_vids_with_all_labels(&["Person", "Employee"]);
335        assert_eq!(result.len(), 2);
336        assert!(result.contains(&Vid::new(2)));
337        assert!(result.contains(&Vid::new(3)));
338    }
339
340    #[test]
341    fn test_add_remove_label() {
342        let mut index = VidLabelsIndex::new();
343        let vid = Vid::new(1);
344
345        index.insert(vid, vec!["Person".to_string()]);
346        assert!(index.has_label(vid, "Person"));
347
348        index.add_label(vid, "Employee".to_string());
349        assert!(index.has_label(vid, "Person"));
350        assert!(index.has_label(vid, "Employee"));
351
352        index.remove_label(vid, "Person");
353        assert!(!index.has_label(vid, "Person"));
354        assert!(index.has_label(vid, "Employee"));
355    }
356
357    #[test]
358    fn test_remove_vid() {
359        let mut index = VidLabelsIndex::new();
360        let vid = Vid::new(1);
361
362        index.insert(vid, vec!["Person".to_string(), "Employee".to_string()]);
363        assert_eq!(index.len(), 1);
364
365        index.remove_vid(vid);
366        assert_eq!(index.len(), 0);
367        assert!(index.get_labels(vid).is_none());
368        assert!(
369            index.get_vids_with_label("Person").is_none()
370                || index.get_vids_with_label("Person").unwrap().is_empty()
371        );
372    }
373}