Skip to main content

sqlite_knowledge_graph/
lib.rs

1//! SQLite-based Knowledge Graph Library
2//!
3//! This library provides a knowledge graph implementation built on SQLite with support for:
4//! - Entities with typed properties
5//! - Relations between entities with weights
6//! - Vector embeddings for semantic search
7//! - Custom SQLite functions for direct SQL operations
8//! - RAG (Retrieval-Augmented Generation) query functions
9//! - Graph algorithms (PageRank, Louvain, Connected Components)
10//!
11//! ## SQLite Extension
12//!
13//! This crate can be compiled as a SQLite loadable extension:
14//! ```bash
15//! cargo build --release
16//! sqlite3 db.db ".load ./target/release/libsqlite_knowledge_graph.dylib"
17//! sqlite3 db.db "SELECT kg_version();"
18//! ```
19
20pub mod algorithms;
21pub mod error;
22pub mod extension;
23pub mod functions;
24pub mod graph;
25pub mod migrate;
26pub mod schema;
27pub mod vector;
28
29pub use algorithms::{
30    analyze_graph, connected_components, louvain_communities, pagerank, CommunityResult,
31    PageRankConfig,
32};
33pub use error::{Error, Result};
34pub use extension::sqlite3_sqlite_knowledge_graph_init;
35pub use functions::register_functions;
36pub use graph::{Direction, GraphStats, PathStep, TraversalNode, TraversalPath, TraversalQuery};
37pub use graph::{Entity, Neighbor, Relation};
38pub use migrate::{
39    build_relationships, migrate_all, migrate_papers, migrate_skills, MigrationStats,
40};
41pub use schema::{create_schema, schema_exists};
42pub use vector::{cosine_similarity, SearchResult, VectorStore};
43pub use vector::{TurboQuantConfig, TurboQuantIndex, TurboQuantStats};
44
45use rusqlite::Connection;
46use serde::{Deserialize, Serialize};
47
48/// Semantic search result with entity information.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct SearchResultWithEntity {
51    pub entity: Entity,
52    pub similarity: f32,
53}
54
55/// Graph context for an entity (root + neighbors).
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct GraphContext {
58    pub root_entity: Entity,
59    pub neighbors: Vec<Neighbor>,
60}
61
62/// Hybrid search result combining semantic similarity and graph context.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct HybridSearchResult {
65    pub entity: Entity,
66    pub similarity: f32,
67    pub context: Option<GraphContext>,
68}
69
70/// Knowledge Graph Manager - main entry point for the library.
71#[derive(Debug)]
72pub struct KnowledgeGraph {
73    conn: Connection,
74}
75
76impl KnowledgeGraph {
77    /// Open a new knowledge graph database connection.
78    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
79        let conn = Connection::open(path)?;
80
81        // Enable foreign keys
82        conn.execute("PRAGMA foreign_keys = ON", [])?;
83
84        // Create schema if not exists
85        if !schema_exists(&conn)? {
86            create_schema(&conn)?;
87        }
88
89        // Register custom functions
90        register_functions(&conn)?;
91
92        Ok(Self { conn })
93    }
94
95    /// Open an in-memory knowledge graph (useful for testing).
96    pub fn open_in_memory() -> Result<Self> {
97        let conn = Connection::open_in_memory()?;
98
99        // Enable foreign keys
100        conn.execute("PRAGMA foreign_keys = ON", [])?;
101
102        // Create schema
103        create_schema(&conn)?;
104
105        // Register custom functions
106        register_functions(&conn)?;
107
108        Ok(Self { conn })
109    }
110
111    /// Get a reference to the underlying SQLite connection.
112    pub fn connection(&self) -> &Connection {
113        &self.conn
114    }
115
116    /// Begin a transaction for batch operations.
117    pub fn transaction(&self) -> Result<rusqlite::Transaction<'_>> {
118        Ok(self.conn.unchecked_transaction()?)
119    }
120
121    /// Insert an entity into the knowledge graph.
122    pub fn insert_entity(&self, entity: &Entity) -> Result<i64> {
123        graph::insert_entity(&self.conn, entity)
124    }
125
126    /// Get an entity by ID.
127    pub fn get_entity(&self, id: i64) -> Result<Entity> {
128        graph::get_entity(&self.conn, id)
129    }
130
131    /// List entities with optional filtering.
132    pub fn list_entities(
133        &self,
134        entity_type: Option<&str>,
135        limit: Option<i64>,
136    ) -> Result<Vec<Entity>> {
137        graph::list_entities(&self.conn, entity_type, limit)
138    }
139
140    /// Update an entity.
141    pub fn update_entity(&self, entity: &Entity) -> Result<()> {
142        graph::update_entity(&self.conn, entity)
143    }
144
145    /// Delete an entity.
146    pub fn delete_entity(&self, id: i64) -> Result<()> {
147        graph::delete_entity(&self.conn, id)
148    }
149
150    /// Insert a relation between entities.
151    pub fn insert_relation(&self, relation: &Relation) -> Result<i64> {
152        graph::insert_relation(&self.conn, relation)
153    }
154
155    /// Get neighbors of an entity using BFS traversal.
156    pub fn get_neighbors(&self, entity_id: i64, depth: u32) -> Result<Vec<Neighbor>> {
157        graph::get_neighbors(&self.conn, entity_id, depth)
158    }
159
160    /// Insert a vector embedding for an entity.
161    pub fn insert_vector(&self, entity_id: i64, vector: Vec<f32>) -> Result<()> {
162        let store = VectorStore::new();
163        store.insert_vector(&self.conn, entity_id, vector)
164    }
165
166    /// Search for similar entities using vector embeddings.
167    pub fn search_vectors(&self, query: Vec<f32>, k: usize) -> Result<Vec<SearchResult>> {
168        let store = VectorStore::new();
169        store.search_vectors(&self.conn, query, k)
170    }
171
172    // ========== TurboQuant Vector Index ==========
173
174    /// Create a TurboQuant index for fast approximate nearest neighbor search.
175    ///
176    /// TurboQuant provides:
177    /// - Instant indexing (no training required)
178    /// - 6x memory compression
179    /// - Near-zero accuracy loss
180    ///
181    /// # Arguments
182    /// * `config` - Optional configuration (uses defaults if None)
183    ///
184    /// # Example
185    /// ```ignore
186    /// let config = TurboQuantConfig {
187    ///     dimension: 384,
188    ///     bit_width: 3,
189    ///     seed: 42,
190    /// };
191    /// let mut index = kg.create_turboquant_index(Some(config))?;
192    ///
193    /// // Add vectors to index
194    /// for (entity_id, vector) in all_vectors {
195    ///     index.add_vector(entity_id, &vector)?;
196    /// }
197    ///
198    /// // Fast search
199    /// let results = index.search(&query_vector, 10)?;
200    /// ```
201    pub fn create_turboquant_index(
202        &self,
203        config: Option<TurboQuantConfig>,
204    ) -> Result<TurboQuantIndex> {
205        let config = config.unwrap_or_default();
206
207        TurboQuantIndex::new(config)
208    }
209
210    /// Build a TurboQuant index from all existing vectors in the database.
211    /// This is a convenience method that loads all vectors and indexes them.
212    pub fn build_turboquant_index(
213        &self,
214        config: Option<TurboQuantConfig>,
215    ) -> Result<TurboQuantIndex> {
216        // Get dimension from first vector
217        let dimension = self.get_vector_dimension()?.unwrap_or(384);
218
219        let config = config.unwrap_or(TurboQuantConfig {
220            dimension,
221            bit_width: 3,
222            seed: 42,
223        });
224
225        let mut index = TurboQuantIndex::new(config)?;
226
227        // Load all vectors
228        let vectors = self.load_all_vectors()?;
229
230        for (entity_id, vector) in vectors {
231            index.add_vector(entity_id, &vector)?;
232        }
233
234        Ok(index)
235    }
236
237    /// Get the dimension of stored vectors (if any exist).
238    fn get_vector_dimension(&self) -> Result<Option<usize>> {
239        let result = self
240            .conn
241            .query_row("SELECT dimension FROM kg_vectors LIMIT 1", [], |row| {
242                row.get::<_, i64>(0)
243            });
244
245        match result {
246            Ok(dim) => Ok(Some(dim as usize)),
247            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
248            Err(e) => Err(e.into()),
249        }
250    }
251
252    /// Load all vectors from the database.
253    fn load_all_vectors(&self) -> Result<Vec<(i64, Vec<f32>)>> {
254        let mut stmt = self
255            .conn
256            .prepare("SELECT entity_id, vector, dimension FROM kg_vectors")?;
257
258        let rows = stmt.query_map([], |row| {
259            let entity_id: i64 = row.get(0)?;
260            let vector_blob: Vec<u8> = row.get(1)?;
261            let dimension: i64 = row.get(2)?;
262
263            let mut vector = Vec::with_capacity(dimension as usize);
264            for chunk in vector_blob.chunks_exact(4) {
265                let bytes: [u8; 4] = chunk.try_into().unwrap();
266                vector.push(f32::from_le_bytes(bytes));
267            }
268
269            Ok((entity_id, vector))
270        })?;
271
272        let mut vectors = Vec::new();
273        for row in rows {
274            vectors.push(row?);
275        }
276
277        Ok(vectors)
278    }
279
280    // ========== RAG Query Functions ==========
281
282    /// Semantic search using vector embeddings.
283    /// Returns entities sorted by similarity score.
284    pub fn kg_semantic_search(
285        &self,
286        query_embedding: Vec<f32>,
287        k: usize,
288    ) -> Result<Vec<SearchResultWithEntity>> {
289        let results = self.search_vectors(query_embedding, k)?;
290
291        let mut entities_with_results = Vec::new();
292        for result in results {
293            let entity = self.get_entity(result.entity_id)?;
294            entities_with_results.push(SearchResultWithEntity {
295                entity,
296                similarity: result.similarity,
297            });
298        }
299
300        Ok(entities_with_results)
301    }
302
303    /// Get context around an entity using graph traversal.
304    /// Returns neighbors up to the specified depth.
305    pub fn kg_get_context(&self, entity_id: i64, depth: u32) -> Result<GraphContext> {
306        let root_entity = self.get_entity(entity_id)?;
307        let neighbors = self.get_neighbors(entity_id, depth)?;
308
309        Ok(GraphContext {
310            root_entity,
311            neighbors,
312        })
313    }
314
315    /// Hybrid search combining semantic search and graph context.
316    /// Performs semantic search first, then retrieves context for top-k results.
317    pub fn kg_hybrid_search(
318        &self,
319        _query_text: &str,
320        query_embedding: Vec<f32>,
321        k: usize,
322    ) -> Result<Vec<HybridSearchResult>> {
323        let semantic_results = self.kg_semantic_search(query_embedding, k)?;
324
325        let mut hybrid_results = Vec::new();
326        for result in semantic_results.iter() {
327            let entity_id = result.entity.id.ok_or(Error::EntityNotFound(0))?;
328            let context = self.kg_get_context(entity_id, 1)?; // Depth 1 context
329
330            hybrid_results.push(HybridSearchResult {
331                entity: result.entity.clone(),
332                similarity: result.similarity,
333                context: Some(context),
334            });
335        }
336
337        Ok(hybrid_results)
338    }
339
340    // ========== Graph Traversal Functions ==========
341
342    /// BFS traversal from a starting entity.
343    /// Returns all reachable entities within max_depth with depth information.
344    pub fn kg_bfs_traversal(
345        &self,
346        start_id: i64,
347        direction: Direction,
348        max_depth: u32,
349    ) -> Result<Vec<TraversalNode>> {
350        let query = TraversalQuery {
351            direction,
352            max_depth,
353            ..Default::default()
354        };
355        graph::bfs_traversal(&self.conn, start_id, query)
356    }
357
358    /// DFS traversal from a starting entity.
359    /// Returns all reachable entities within max_depth.
360    pub fn kg_dfs_traversal(
361        &self,
362        start_id: i64,
363        direction: Direction,
364        max_depth: u32,
365    ) -> Result<Vec<TraversalNode>> {
366        let query = TraversalQuery {
367            direction,
368            max_depth,
369            ..Default::default()
370        };
371        graph::dfs_traversal(&self.conn, start_id, query)
372    }
373
374    /// Find shortest path between two entities using BFS.
375    /// Returns the path with all intermediate steps (if exists).
376    pub fn kg_shortest_path(
377        &self,
378        from_id: i64,
379        to_id: i64,
380        max_depth: u32,
381    ) -> Result<Option<TraversalPath>> {
382        graph::find_shortest_path(&self.conn, from_id, to_id, max_depth)
383    }
384
385    /// Compute graph statistics.
386    pub fn kg_graph_stats(&self) -> Result<GraphStats> {
387        graph::compute_graph_stats(&self.conn)
388    }
389
390    // ========== Graph Algorithms ==========
391
392    /// Compute PageRank scores for all entities.
393    /// Returns a vector of (entity_id, score) sorted by score descending.
394    pub fn kg_pagerank(&self, config: Option<PageRankConfig>) -> Result<Vec<(i64, f64)>> {
395        algorithms::pagerank(&self.conn, config.unwrap_or_default())
396    }
397
398    /// Detect communities using Louvain algorithm.
399    /// Returns community memberships and modularity score.
400    pub fn kg_louvain(&self) -> Result<CommunityResult> {
401        algorithms::louvain_communities(&self.conn)
402    }
403
404    /// Find connected components in the graph.
405    /// Returns a list of components, each being a list of entity IDs.
406    pub fn kg_connected_components(&self) -> Result<Vec<Vec<i64>>> {
407        algorithms::connected_components(&self.conn)
408    }
409
410    /// Run full graph analysis (PageRank + Louvain + Connected Components).
411    pub fn kg_analyze(&self) -> Result<algorithms::GraphAnalysis> {
412        algorithms::analyze_graph(&self.conn)
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419
420    #[test]
421    fn test_open_in_memory() {
422        let kg = KnowledgeGraph::open_in_memory().unwrap();
423        assert!(schema_exists(kg.connection()).unwrap());
424    }
425
426    #[test]
427    fn test_crud_operations() {
428        let kg = KnowledgeGraph::open_in_memory().unwrap();
429
430        // Create entity
431        let mut entity = Entity::new("paper", "Test Paper");
432        entity.set_property("author", serde_json::json!("John Doe"));
433        let id = kg.insert_entity(&entity).unwrap();
434
435        // Read entity
436        let retrieved = kg.get_entity(id).unwrap();
437        assert_eq!(retrieved.name, "Test Paper");
438
439        // List entities
440        let entities = kg.list_entities(Some("paper"), None).unwrap();
441        assert_eq!(entities.len(), 1);
442
443        // Update entity
444        let mut updated = retrieved.clone();
445        updated.set_property("year", serde_json::json!(2024));
446        kg.update_entity(&updated).unwrap();
447
448        // Delete entity
449        kg.delete_entity(id).unwrap();
450        let entities = kg.list_entities(None, None).unwrap();
451        assert_eq!(entities.len(), 0);
452    }
453
454    #[test]
455    fn test_graph_traversal() {
456        let kg = KnowledgeGraph::open_in_memory().unwrap();
457
458        // Create entities
459        let id1 = kg.insert_entity(&Entity::new("paper", "Paper 1")).unwrap();
460        let id2 = kg.insert_entity(&Entity::new("paper", "Paper 2")).unwrap();
461        let id3 = kg.insert_entity(&Entity::new("paper", "Paper 3")).unwrap();
462
463        // Create relations
464        kg.insert_relation(&Relation::new(id1, id2, "cites", 0.8).unwrap())
465            .unwrap();
466        kg.insert_relation(&Relation::new(id2, id3, "cites", 0.9).unwrap())
467            .unwrap();
468
469        // Get neighbors depth 1
470        let neighbors = kg.get_neighbors(id1, 1).unwrap();
471        assert_eq!(neighbors.len(), 1);
472
473        // Get neighbors depth 2
474        let neighbors = kg.get_neighbors(id1, 2).unwrap();
475        assert_eq!(neighbors.len(), 2);
476    }
477
478    #[test]
479    fn test_vector_search() {
480        let kg = KnowledgeGraph::open_in_memory().unwrap();
481
482        // Create entities
483        let id1 = kg.insert_entity(&Entity::new("paper", "Paper 1")).unwrap();
484        let id2 = kg.insert_entity(&Entity::new("paper", "Paper 2")).unwrap();
485
486        // Insert vectors
487        kg.insert_vector(id1, vec![1.0, 0.0, 0.0]).unwrap();
488        kg.insert_vector(id2, vec![0.0, 1.0, 0.0]).unwrap();
489
490        // Search
491        let results = kg.search_vectors(vec![1.0, 0.0, 0.0], 2).unwrap();
492        assert_eq!(results.len(), 2);
493        assert_eq!(results[0].entity_id, id1);
494    }
495}