Skip to main content

reddb_server/storage/unified/
index.rs

1//! Integrated Index System
2//!
3//! Combines multiple index types for optimal multi-modal queries:
4//! - HNSW: Vector similarity search
5//! - B-tree: Metadata range queries (via MetadataStorage)
6//! - Inverted: Full-text search on content
7//!
8//! # Architecture
9//!
10//! ```text
11//! ┌─────────────────────────────────────────────────────────────┐
12//! │                   IntegratedIndexManager                     │
13//! ├─────────────────────────────────────────────────────────────┤
14//! │  ┌───────────┐  ┌───────────────┐  ┌─────────────────────┐  │
15//! │  │   HNSW    │  │  InvertedIndex│  │  MetadataIndex      │  │
16//! │  │  (vector) │  │  (full-text)  │  │  (B-tree ranges)    │  │
17//! │  └─────┬─────┘  └───────┬───────┘  └──────────┬──────────┘  │
18//! │        │                │                      │             │
19//! │        └────────────────┼──────────────────────┘             │
20//! │                         │                                    │
21//! │              ┌──────────▼──────────┐                        │
22//! │              │   Unified Query     │                        │
23//! │              │   Executor          │                        │
24//! │              └─────────────────────┘                        │
25//! └─────────────────────────────────────────────────────────────┘
26//! ```
27
28use std::collections::{BTreeMap, HashMap, HashSet};
29use std::time::{SystemTime, UNIX_EPOCH};
30
31use parking_lot::RwLock;
32
33use super::entity::EntityId;
34use super::metadata::{Metadata, MetadataStorage, MetadataValue};
35
36// ============================================================================
37// Graph Adjacency Index
38// ============================================================================
39
40/// Edge direction for adjacency lookups
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum EdgeDirection {
43    Outgoing,
44    Incoming,
45    Both,
46}
47
48/// An adjacency entry representing a single edge
49#[derive(Debug, Clone)]
50pub struct AdjacencyEntry {
51    /// The edge entity ID
52    pub edge_id: EntityId,
53    /// The target node (for outgoing) or source node (for incoming)
54    pub neighbor_id: EntityId,
55    /// Edge label/type
56    pub label: String,
57    /// Edge weight (optional, default 1.0)
58    pub weight: f32,
59}
60
61/// Graph adjacency index for fast edge/neighbor lookups
62///
63/// Supports O(1) lookups for:
64/// - All outgoing edges from a node
65/// - All incoming edges to a node
66/// - Edges filtered by label
67pub struct GraphAdjacencyIndex {
68    /// Node → outgoing edges: HashMap<source_id, Vec<AdjacencyEntry>>
69    outgoing: RwLock<HashMap<EntityId, Vec<AdjacencyEntry>>>,
70    /// Node → incoming edges: HashMap<target_id, Vec<AdjacencyEntry>>
71    incoming: RwLock<HashMap<EntityId, Vec<AdjacencyEntry>>>,
72    /// Label → edge IDs for label-based filtering
73    by_label: RwLock<HashMap<String, HashSet<EntityId>>>,
74    /// Edge count for stats
75    edge_count: RwLock<usize>,
76    /// Node count (unique nodes with edges)
77    node_count: RwLock<usize>,
78}
79
80impl GraphAdjacencyIndex {
81    /// Create a new empty adjacency index
82    pub fn new() -> Self {
83        Self {
84            outgoing: RwLock::new(HashMap::new()),
85            incoming: RwLock::new(HashMap::new()),
86            by_label: RwLock::new(HashMap::new()),
87            edge_count: RwLock::new(0),
88            node_count: RwLock::new(0),
89        }
90    }
91
92    /// Index an edge for fast lookups
93    pub fn index_edge(
94        &self,
95        edge_id: EntityId,
96        source_id: EntityId,
97        target_id: EntityId,
98        label: &str,
99        weight: f32,
100    ) {
101        // Add to outgoing adjacency
102        {
103            let mut outgoing = self.outgoing.write();
104            let entry = AdjacencyEntry {
105                edge_id,
106                neighbor_id: target_id,
107                label: label.to_string(),
108                weight,
109            };
110            outgoing.entry(source_id).or_default().push(entry);
111        }
112
113        // Add to incoming adjacency
114        {
115            let mut incoming = self.incoming.write();
116            let entry = AdjacencyEntry {
117                edge_id,
118                neighbor_id: source_id,
119                label: label.to_string(),
120                weight,
121            };
122            incoming.entry(target_id).or_default().push(entry);
123        }
124
125        // Add to label index
126        {
127            let mut by_label = self.by_label.write();
128            by_label
129                .entry(label.to_string())
130                .or_default()
131                .insert(edge_id);
132        }
133
134        // Update counts
135        {
136            let mut count = self.edge_count.write();
137            *count += 1;
138        }
139
140        // Update node count (track unique nodes)
141        self.update_node_count();
142    }
143
144    /// Remove an edge from the index
145    pub fn remove_edge(&self, edge_id: EntityId) {
146        // Remove from outgoing
147        {
148            let mut outgoing = self.outgoing.write();
149            for entries in outgoing.values_mut() {
150                entries.retain(|e| e.edge_id != edge_id);
151            }
152        }
153
154        // Remove from incoming
155        {
156            let mut incoming = self.incoming.write();
157            for entries in incoming.values_mut() {
158                entries.retain(|e| e.edge_id != edge_id);
159            }
160        }
161
162        // Remove from label index
163        {
164            let mut by_label = self.by_label.write();
165            for edges in by_label.values_mut() {
166                edges.remove(&edge_id);
167            }
168        }
169
170        // Update counts
171        {
172            let mut count = self.edge_count.write();
173            *count = count.saturating_sub(1);
174        }
175    }
176
177    /// Get neighbors in a direction (optionally filtered by label)
178    pub fn get_neighbors(
179        &self,
180        node_id: EntityId,
181        direction: EdgeDirection,
182        label_filter: Option<&str>,
183    ) -> Vec<AdjacencyEntry> {
184        let mut results = Vec::new();
185
186        if matches!(direction, EdgeDirection::Outgoing | EdgeDirection::Both) {
187            let outgoing = self.outgoing.read();
188            if let Some(entries) = outgoing.get(&node_id) {
189                for entry in entries {
190                    if label_filter.is_none_or(|l| entry.label == l) {
191                        results.push(entry.clone());
192                    }
193                }
194            }
195        }
196
197        if matches!(direction, EdgeDirection::Incoming | EdgeDirection::Both) {
198            let incoming = self.incoming.read();
199            if let Some(entries) = incoming.get(&node_id) {
200                for entry in entries {
201                    if label_filter.is_none_or(|l| entry.label == l) {
202                        results.push(entry.clone());
203                    }
204                }
205            }
206        }
207
208        results
209    }
210
211    /// Get all edges with a specific label
212    pub fn get_edges_by_label(&self, label: &str) -> Vec<EntityId> {
213        let idx = self.by_label.read();
214        idx.get(label)
215            .map(|s| s.iter().copied().collect())
216            .unwrap_or_default()
217    }
218
219    /// Get outgoing degree of a node
220    pub fn out_degree(&self, node_id: EntityId) -> usize {
221        self.outgoing
222            .read()
223            .get(&node_id)
224            .map(|v| v.len())
225            .unwrap_or(0)
226    }
227
228    /// Get incoming degree of a node
229    pub fn in_degree(&self, node_id: EntityId) -> usize {
230        self.incoming
231            .read()
232            .get(&node_id)
233            .map(|v| v.len())
234            .unwrap_or(0)
235    }
236
237    /// Get total degree of a node
238    pub fn degree(&self, node_id: EntityId) -> usize {
239        self.out_degree(node_id) + self.in_degree(node_id)
240    }
241
242    /// Get edge count
243    pub fn edge_count(&self) -> usize {
244        *self.edge_count.read()
245    }
246
247    /// Get node count
248    pub fn node_count(&self) -> usize {
249        *self.node_count.read()
250    }
251
252    /// Clear the entire index
253    pub fn clear(&self) {
254        self.outgoing.write().clear();
255        self.incoming.write().clear();
256        self.by_label.write().clear();
257        *self.edge_count.write() = 0;
258        *self.node_count.write() = 0;
259    }
260
261    fn update_node_count(&self) {
262        let out_nodes: HashSet<_> = self.outgoing.read().keys().copied().collect();
263        let in_nodes: HashSet<_> = self.incoming.read().keys().copied().collect();
264
265        let total: HashSet<_> = out_nodes.union(&in_nodes).collect();
266        *self.node_count.write() = total.len();
267    }
268}
269
270impl Default for GraphAdjacencyIndex {
271    fn default() -> Self {
272        Self::new()
273    }
274}
275
276// ============================================================================
277// Inverted Index for Full-Text Search
278// ============================================================================
279
280/// Token with position information for phrase queries
281#[derive(Debug, Clone)]
282pub struct TokenPosition {
283    pub entity_id: EntityId,
284    pub field: String,
285    pub position: u32,
286}
287
288/// A posting list entry
289#[derive(Debug, Clone)]
290pub struct PostingEntry {
291    pub entity_id: EntityId,
292    pub collection: String,
293    pub field: String,
294    pub positions: Vec<u32>,
295    pub term_frequency: f32,
296}
297
298/// Inverted index for full-text search
299pub struct InvertedIndex {
300    /// Term → Posting list
301    index: RwLock<BTreeMap<String, Vec<PostingEntry>>>,
302    /// Document frequencies for TF-IDF
303    doc_count: RwLock<usize>,
304    /// Indexed fields per collection
305    indexed_fields: RwLock<HashMap<String, HashSet<String>>>,
306}
307
308impl InvertedIndex {
309    /// Create a new empty inverted index
310    pub fn new() -> Self {
311        Self {
312            index: RwLock::new(BTreeMap::new()),
313            doc_count: RwLock::new(0),
314            indexed_fields: RwLock::new(HashMap::new()),
315        }
316    }
317
318    /// Configure which fields to index for a collection
319    pub fn add_indexed_field(&self, collection: &str, field: &str) {
320        self.indexed_fields
321            .write()
322            .entry(collection.to_string())
323            .or_default()
324            .insert(field.to_string());
325    }
326
327    /// Index a document's text content
328    pub fn index_document(
329        &self,
330        collection: &str,
331        entity_id: EntityId,
332        field: &str,
333        content: &str,
334    ) {
335        let tokens = self.tokenize(content);
336        let term_count = tokens.len() as f32;
337
338        // Count term frequencies
339        let mut term_freqs: HashMap<String, Vec<u32>> = HashMap::new();
340        for (position, token) in tokens.iter().enumerate() {
341            term_freqs
342                .entry(token.clone())
343                .or_default()
344                .push(position as u32);
345        }
346
347        // Add to index
348        {
349            let mut index = self.index.write();
350            for (term, positions) in term_freqs {
351                let tf = positions.len() as f32 / term_count.max(1.0);
352
353                let entry = PostingEntry {
354                    entity_id,
355                    collection: collection.to_string(),
356                    field: field.to_string(),
357                    positions,
358                    term_frequency: tf,
359                };
360
361                index.entry(term).or_default().push(entry);
362            }
363        }
364
365        // Update doc count
366        *self.doc_count.write() += 1;
367    }
368
369    /// Remove a document from the index
370    pub fn remove_document(&self, entity_id: EntityId) {
371        let mut index = self.index.write();
372        for postings in index.values_mut() {
373            postings.retain(|p| p.entity_id != entity_id);
374        }
375    }
376
377    /// Search for documents containing all terms (AND query)
378    pub fn search(&self, query: &str, limit: usize) -> Vec<TextSearchResult> {
379        let terms = self.tokenize(query);
380        if terms.is_empty() {
381            return Vec::new();
382        }
383
384        let index = self.index.read();
385
386        let doc_count = *self.doc_count.read();
387
388        // Get posting lists for all terms
389        let mut term_postings: Vec<&Vec<PostingEntry>> = Vec::new();
390        for term in &terms {
391            if let Some(postings) = index.get(term) {
392                term_postings.push(postings);
393            } else {
394                // Term not found, AND query fails
395                return Vec::new();
396            }
397        }
398
399        // Find documents containing all terms
400        let mut scores: HashMap<EntityId, f32> = HashMap::new();
401
402        // Start with first term's documents
403        if let Some(first_postings) = term_postings.first() {
404            for posting in *first_postings {
405                let idf = ((doc_count as f32) / (first_postings.len() as f32 + 1.0)).ln();
406                scores.insert(posting.entity_id, posting.term_frequency * idf);
407            }
408        }
409
410        // Intersect with remaining terms
411        for postings in term_postings.iter().skip(1) {
412            let idf = ((doc_count as f32) / (postings.len() as f32 + 1.0)).ln();
413            let entities_in_term: HashSet<EntityId> =
414                postings.iter().map(|p| p.entity_id).collect();
415
416            // Keep only documents that have this term
417            scores.retain(|id, _| entities_in_term.contains(id));
418
419            // Add TF-IDF score
420            for posting in *postings {
421                if let Some(score) = scores.get_mut(&posting.entity_id) {
422                    *score += posting.term_frequency * idf;
423                }
424            }
425        }
426
427        // Convert to results and sort
428        let mut results: Vec<TextSearchResult> = scores
429            .into_iter()
430            .map(|(entity_id, score)| {
431                // Get collection from first posting
432                let collection = term_postings
433                    .first()
434                    .and_then(|p| p.iter().find(|e| e.entity_id == entity_id))
435                    .map(|p| p.collection.clone())
436                    .unwrap_or_default();
437
438                TextSearchResult {
439                    entity_id,
440                    collection,
441                    score,
442                    matched_terms: terms.clone(),
443                }
444            })
445            .collect();
446
447        results.sort_by(|a, b| {
448            b.score
449                .partial_cmp(&a.score)
450                .unwrap_or(std::cmp::Ordering::Equal)
451        });
452        results.truncate(limit);
453        results
454    }
455
456    /// Search with prefix matching (for autocomplete)
457    pub fn search_prefix(&self, prefix: &str, limit: usize) -> Vec<String> {
458        let prefix_lower = prefix.to_lowercase();
459
460        let index = self.index.read();
461
462        index
463            .range(prefix_lower.clone()..)
464            .take_while(|(term, _)| term.starts_with(&prefix_lower))
465            .take(limit)
466            .map(|(term, _)| term.clone())
467            .collect()
468    }
469
470    /// Simple tokenization - splits on whitespace and punctuation
471    fn tokenize(&self, text: &str) -> Vec<String> {
472        text.to_lowercase()
473            .split(|c: char| !c.is_alphanumeric())
474            .filter(|s| s.len() >= 2)
475            .map(|s| s.to_string())
476            .collect()
477    }
478}
479
480impl Default for InvertedIndex {
481    fn default() -> Self {
482        Self::new()
483    }
484}
485
486/// Result from a text search
487#[derive(Debug, Clone)]
488pub struct TextSearchResult {
489    pub entity_id: EntityId,
490    pub collection: String,
491    pub score: f32,
492    pub matched_terms: Vec<String>,
493}
494
495// ============================================================================
496// Integrated Index Manager
497// ============================================================================
498
499/// Configuration for the integrated index system
500#[derive(Debug, Clone)]
501pub struct IntegratedIndexConfig {
502    /// Enable HNSW indexing for vectors
503    pub enable_hnsw: bool,
504    /// Enable full-text indexing
505    pub enable_fulltext: bool,
506    /// Enable metadata indexing
507    pub enable_metadata: bool,
508    /// Enable graph adjacency indexing
509    pub enable_graph: bool,
510    /// HNSW M parameter (connections per node)
511    pub hnsw_m: usize,
512    /// HNSW ef_construction parameter
513    pub hnsw_ef_construction: usize,
514    /// HNSW ef_search parameter
515    pub hnsw_ef_search: usize,
516}
517
518impl Default for IntegratedIndexConfig {
519    fn default() -> Self {
520        Self {
521            enable_hnsw: true,
522            enable_fulltext: true,
523            enable_metadata: true,
524            enable_graph: true,
525            hnsw_m: 16,
526            hnsw_ef_construction: 100,
527            hnsw_ef_search: 50,
528        }
529    }
530}
531
532/// Statistics about the index system
533#[derive(Debug, Clone, Default)]
534pub struct IndexStats {
535    /// Number of indexed vectors
536    pub vector_count: usize,
537    /// Number of indexed documents (for full-text)
538    pub document_count: usize,
539    /// Number of unique terms
540    pub term_count: usize,
541    /// Number of metadata entries
542    pub metadata_entries: usize,
543    /// Number of graph nodes
544    pub graph_node_count: usize,
545    /// Number of graph edges
546    pub graph_edge_count: usize,
547    /// Index creation timestamp
548    pub created_at: u64,
549    /// Last update timestamp
550    pub updated_at: u64,
551}
552
553/// Types of indices that can be managed
554#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
555pub enum IndexType {
556    /// HNSW vector index
557    Hnsw,
558    /// Inverted full-text index
559    Fulltext,
560    /// B-tree metadata index
561    Metadata,
562    /// Graph adjacency index
563    Graph,
564}
565
566/// Status of an index operation
567#[derive(Debug, Clone)]
568pub enum IndexStatus {
569    /// Index is ready for use
570    Ready,
571    /// Index is being built
572    Building { progress: f32 },
573    /// Index needs rebuild
574    Stale,
575    /// Index is disabled
576    Disabled,
577    /// Index encountered an error
578    Error(String),
579}
580
581/// Index lifecycle event for tracking
582#[derive(Debug, Clone)]
583pub struct IndexEvent {
584    pub index_type: IndexType,
585    pub collection: Option<String>,
586    pub event: IndexEventKind,
587    pub timestamp: u64,
588}
589
590#[derive(Debug, Clone)]
591pub enum IndexEventKind {
592    Created,
593    Dropped,
594    Rebuilt,
595    Updated { entries_affected: usize },
596}
597
598/// Integrated Index Manager combining all index types
599pub struct IntegratedIndexManager {
600    /// Configuration
601    config: IntegratedIndexConfig,
602    /// Inverted index for full-text search
603    text_index: InvertedIndex,
604    /// Metadata storage (provides B-tree indexing)
605    metadata_index: RwLock<MetadataStorage>,
606    /// Collection-specific HNSW indices
607    /// Key: collection name, Value: (dimension, index data)
608    hnsw_indices: RwLock<HashMap<String, HnswIndexInfo>>,
609    /// Graph adjacency index
610    graph_index: GraphAdjacencyIndex,
611    /// Index status tracking
612    index_status: RwLock<HashMap<(IndexType, Option<String>), IndexStatus>>,
613    /// Lifecycle event history
614    event_history: RwLock<Vec<IndexEvent>>,
615    /// Creation timestamp
616    created_at: u64,
617}
618
619/// Information about an HNSW index for a collection
620struct HnswIndexInfo {
621    /// Vector dimension
622    dimension: usize,
623    /// Vectors stored (id → vector)
624    vectors: HashMap<EntityId, Vec<f32>>,
625    /// HNSW graph layers (simplified representation)
626    /// In production, would use the full HNSW implementation
627    entry_point: Option<EntityId>,
628}
629
630pub mod incremental;
631mod manager_impl;
632pub use incremental::{IncrementalIndexMaintainer, IndexDeltaOp, SecondaryIndexHandle};
633impl Default for IntegratedIndexManager {
634    fn default() -> Self {
635        Self::new()
636    }
637}
638
639/// Result from a vector similarity search
640#[derive(Debug, Clone)]
641pub struct VectorSearchResult {
642    pub entity_id: EntityId,
643    pub collection: String,
644    pub similarity: f32,
645}
646
647/// Filter for metadata queries
648#[derive(Debug, Clone)]
649pub enum MetadataQueryFilter {
650    /// Exact match
651    Equals(MetadataValue),
652    /// Range query (inclusive)
653    Range {
654        min: Option<MetadataValue>,
655        max: Option<MetadataValue>,
656    },
657    /// String contains
658    Contains(String),
659    /// Value in set
660    In(Vec<MetadataValue>),
661}
662
663// ============================================================================
664// Utility Functions
665// ============================================================================
666
667/// Compute cosine similarity between two vectors
668fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
669    if a.len() != b.len() || a.is_empty() {
670        return 0.0;
671    }
672
673    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
674    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
675    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
676
677    if norm_a == 0.0 || norm_b == 0.0 {
678        return 0.0;
679    }
680
681    dot / (norm_a * norm_b)
682}
683
684// ============================================================================
685// Tests
686// ============================================================================
687
688#[cfg(test)]
689mod tests {
690    use super::*;
691
692    #[test]
693    fn test_inverted_index_basic() {
694        let index = InvertedIndex::new();
695
696        index.index_document(
697            "docs",
698            EntityId(1),
699            "content",
700            "hello world rust programming",
701        );
702        index.index_document("docs", EntityId(2), "content", "rust is fast and safe");
703        index.index_document("docs", EntityId(3), "content", "python is easy to learn");
704
705        let results = index.search("rust", 10);
706        assert_eq!(results.len(), 2);
707        assert!(results.iter().any(|r| r.entity_id == EntityId(1)));
708        assert!(results.iter().any(|r| r.entity_id == EntityId(2)));
709    }
710
711    #[test]
712    fn test_inverted_index_and_query() {
713        let index = InvertedIndex::new();
714
715        index.index_document("docs", EntityId(1), "content", "rust programming language");
716        index.index_document("docs", EntityId(2), "content", "rust systems programming");
717        index.index_document(
718            "docs",
719            EntityId(3),
720            "content",
721            "python programming language",
722        );
723
724        // AND query: both "rust" and "programming"
725        let results = index.search("rust programming", 10);
726        assert_eq!(results.len(), 2);
727
728        // "language" appears in docs 1 and 3, "programming" in all three
729        // AND of "language programming" gives docs 1 and 3
730        let results = index.search("language programming", 10);
731        assert_eq!(results.len(), 2);
732    }
733
734    #[test]
735    fn test_prefix_search() {
736        let index = InvertedIndex::new();
737
738        index.index_document("docs", EntityId(1), "content", "programming rust rustacean");
739
740        let suggestions = index.search_prefix("rust", 10);
741        assert!(suggestions.contains(&"rust".to_string()));
742        assert!(suggestions.contains(&"rustacean".to_string()));
743    }
744
745    #[test]
746    fn test_vector_search() {
747        let manager = IntegratedIndexManager::new();
748
749        manager.index_vector("embeddings", EntityId(1), &[1.0, 0.0, 0.0]);
750        manager.index_vector("embeddings", EntityId(2), &[0.9, 0.1, 0.0]);
751        manager.index_vector("embeddings", EntityId(3), &[0.0, 1.0, 0.0]);
752
753        let results = manager.search_similar("embeddings", &[1.0, 0.0, 0.0], 2);
754        assert_eq!(results.len(), 2);
755        assert_eq!(results[0].entity_id, EntityId(1));
756        assert!(results[0].similarity > 0.99);
757    }
758
759    #[test]
760    fn test_cosine_similarity() {
761        let a = [1.0, 0.0, 0.0];
762        let b = [1.0, 0.0, 0.0];
763        assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.001);
764
765        let c = [0.0, 1.0, 0.0];
766        assert!(cosine_similarity(&a, &c).abs() < 0.001);
767    }
768
769    // =========================================================================
770    // Graph Adjacency Index Tests
771    // =========================================================================
772
773    #[test]
774    fn test_graph_adjacency_basic() {
775        let index = GraphAdjacencyIndex::new();
776
777        // Create nodes: 1 -> 2 -> 3
778        index.index_edge(EntityId(100), EntityId(1), EntityId(2), "KNOWS", 1.0);
779        index.index_edge(EntityId(101), EntityId(2), EntityId(3), "KNOWS", 1.0);
780
781        // Check outgoing from node 1
782        let neighbors = index.get_neighbors(EntityId(1), EdgeDirection::Outgoing, None);
783        assert_eq!(neighbors.len(), 1);
784        assert_eq!(neighbors[0].neighbor_id, EntityId(2));
785
786        // Check incoming to node 2
787        let neighbors = index.get_neighbors(EntityId(2), EdgeDirection::Incoming, None);
788        assert_eq!(neighbors.len(), 1);
789        assert_eq!(neighbors[0].neighbor_id, EntityId(1));
790
791        // Check both directions for node 2
792        let neighbors = index.get_neighbors(EntityId(2), EdgeDirection::Both, None);
793        assert_eq!(neighbors.len(), 2);
794    }
795
796    #[test]
797    fn test_graph_adjacency_label_filter() {
798        let index = GraphAdjacencyIndex::new();
799
800        // Create edges with different labels
801        index.index_edge(EntityId(100), EntityId(1), EntityId(2), "KNOWS", 1.0);
802        index.index_edge(EntityId(101), EntityId(1), EntityId(3), "WORKS_WITH", 1.0);
803        index.index_edge(EntityId(102), EntityId(1), EntityId(4), "KNOWS", 1.0);
804
805        // Filter by KNOWS label
806        let neighbors = index.get_neighbors(EntityId(1), EdgeDirection::Outgoing, Some("KNOWS"));
807        assert_eq!(neighbors.len(), 2);
808
809        // Filter by WORKS_WITH label
810        let neighbors =
811            index.get_neighbors(EntityId(1), EdgeDirection::Outgoing, Some("WORKS_WITH"));
812        assert_eq!(neighbors.len(), 1);
813        assert_eq!(neighbors[0].neighbor_id, EntityId(3));
814    }
815
816    #[test]
817    fn test_graph_adjacency_degree() {
818        let index = GraphAdjacencyIndex::new();
819
820        // Create a star graph: 1 -> [2, 3, 4, 5]
821        index.index_edge(EntityId(100), EntityId(1), EntityId(2), "LINK", 1.0);
822        index.index_edge(EntityId(101), EntityId(1), EntityId(3), "LINK", 1.0);
823        index.index_edge(EntityId(102), EntityId(1), EntityId(4), "LINK", 1.0);
824        index.index_edge(EntityId(103), EntityId(1), EntityId(5), "LINK", 1.0);
825
826        assert_eq!(index.out_degree(EntityId(1)), 4);
827        assert_eq!(index.in_degree(EntityId(1)), 0);
828        assert_eq!(index.degree(EntityId(1)), 4);
829
830        // Leaf nodes have in-degree 1
831        assert_eq!(index.in_degree(EntityId(2)), 1);
832        assert_eq!(index.out_degree(EntityId(2)), 0);
833    }
834
835    #[test]
836    fn test_graph_adjacency_edge_by_label() {
837        let index = GraphAdjacencyIndex::new();
838
839        index.index_edge(EntityId(100), EntityId(1), EntityId(2), "A", 1.0);
840        index.index_edge(EntityId(101), EntityId(2), EntityId(3), "B", 1.0);
841        index.index_edge(EntityId(102), EntityId(3), EntityId(4), "A", 1.0);
842
843        let edges_a = index.get_edges_by_label("A");
844        assert_eq!(edges_a.len(), 2);
845        assert!(edges_a.contains(&EntityId(100)));
846        assert!(edges_a.contains(&EntityId(102)));
847
848        let edges_b = index.get_edges_by_label("B");
849        assert_eq!(edges_b.len(), 1);
850        assert!(edges_b.contains(&EntityId(101)));
851    }
852
853    #[test]
854    fn test_graph_adjacency_remove() {
855        let index = GraphAdjacencyIndex::new();
856
857        index.index_edge(EntityId(100), EntityId(1), EntityId(2), "LINK", 1.0);
858        index.index_edge(EntityId(101), EntityId(1), EntityId(3), "LINK", 1.0);
859
860        assert_eq!(index.edge_count(), 2);
861
862        // Remove one edge
863        index.remove_edge(EntityId(100));
864
865        assert_eq!(index.edge_count(), 1);
866        let neighbors = index.get_neighbors(EntityId(1), EdgeDirection::Outgoing, None);
867        assert_eq!(neighbors.len(), 1);
868        assert_eq!(neighbors[0].neighbor_id, EntityId(3));
869    }
870
871    // =========================================================================
872    // Index Lifecycle Tests
873    // =========================================================================
874
875    #[test]
876    fn test_index_lifecycle_create_drop() {
877        let manager = IntegratedIndexManager::new();
878
879        // Create a new HNSW index for a specific collection
880        let result = manager.create_index(IndexType::Hnsw, Some("my_collection"));
881        assert!(result.is_ok());
882
883        // Check status
884        let status = manager.index_status(IndexType::Hnsw, Some("my_collection"));
885        assert!(matches!(status, IndexStatus::Ready));
886
887        // Drop the index
888        let result = manager.drop_index(IndexType::Hnsw, Some("my_collection"));
889        assert!(result.is_ok());
890    }
891
892    #[test]
893    fn test_index_lifecycle_rebuild() {
894        let manager = IntegratedIndexManager::new();
895
896        // Index some vectors
897        manager.index_vector("test", EntityId(1), &[1.0, 0.0, 0.0]);
898        manager.index_vector("test", EntityId(2), &[0.0, 1.0, 0.0]);
899
900        // Rebuild the index
901        let result = manager.rebuild_index(IndexType::Hnsw, Some("test"));
902        assert!(result.is_ok());
903
904        // Check status is ready
905        let status = manager.index_status(IndexType::Hnsw, Some("test"));
906        assert!(matches!(status, IndexStatus::Ready));
907    }
908
909    #[test]
910    fn test_index_stats_with_graph() {
911        let manager = IntegratedIndexManager::new();
912
913        // Add some edges
914        manager.index_edge(EntityId(100), EntityId(1), EntityId(2), "LINK", 1.0);
915        manager.index_edge(EntityId(101), EntityId(2), EntityId(3), "LINK", 1.0);
916
917        let stats = manager.stats();
918        assert_eq!(stats.graph_edge_count, 2);
919        assert!(stats.graph_node_count >= 2); // At least source nodes
920    }
921
922    #[test]
923    fn test_integrated_manager_graph_operations() {
924        let manager = IntegratedIndexManager::new();
925
926        // Index edges through the manager
927        manager.index_edge(EntityId(100), EntityId(1), EntityId(2), "KNOWS", 1.0);
928        manager.index_edge(EntityId(101), EntityId(2), EntityId(3), "KNOWS", 0.5);
929
930        // Query neighbors through the manager
931        let neighbors = manager.get_neighbors(EntityId(1), EdgeDirection::Outgoing, None);
932        assert_eq!(neighbors.len(), 1);
933        assert_eq!(neighbors[0].neighbor_id, EntityId(2));
934        assert_eq!(neighbors[0].weight, 1.0);
935
936        // Check degree
937        assert_eq!(manager.node_degree(EntityId(2), EdgeDirection::Both), 2);
938    }
939}